require(tidyverse)
require(reshape2)
require(RColorBrewer)
require(ComplexHeatmap)
require(dendextend)
require(circlize)
require(viridis)
require(naniar)
require(pals)
require(PermCor)
require(vegan)
library(gridExtra)
library(data.table)

lmp <- function (modelobject) {
  if (class(modelobject) != "lm") stop("Not an object of class 'lm' ")
  f <- summary(modelobject)$fstatistic
  p <- pf(f[1],f[2],f[3],lower.tail=F)
  attributes(p) <- NULL
  return(p)
}

source("/Users/emifaure/Documents/TONGA/StageMilo/PetB_Corr_Heatmaps_RDA/scoresRDA.R")

1 Load metadata

########## Load metadata ############

meta <- read.table(file = "/Users/emifaure/Documents/ACE/MetaData/ACE_metadata/Metadata_For_Submission/meta_CTD_ForSubmission.csv", 
                   header=TRUE, sep = ";", dec = ".", stringsAsFactors = FALSE)
meta <- meta %>% 
  mutate_if(.predicate = is.character, .funs = as.factor)

# Load list of metagenomes
metagenomes <- read.table("/Users/emifaure/Documents/ACE/Metagenomics/ACEsamples_With_Ace_seq_name.tsv", header = TRUE)

# Load Nickel dataset
Nickel=read.table("/Users/emifaure/Documents/ACE/NolwennCollab/NickelData.txt", sep="\t", header=T, dec=",")
meta_nickel = meta %>% inner_join(Nickel, by=c("TM_station_number","Depth_m")) %>%
  filter(!is.na(Ni_60_58_D_DELTA_BOTTLE))
# 85 ACE_seq_Name have a nickel values, how many in out sequenced metagenomes ?

meta_nickel = meta_nickel[which(meta_nickel$ACE_seq_name %in% metagenomes$ace_seq_name),]
# we have 48 metagenomes that can be associated with an isotopic value

2 Load metagenomics data

########## Load Metagnomics data ############

# CDHit clusters of interest were selected according to their functional annotations:
#  - Annotations containing one of the words "urease", "NiFe", ["superoxide"+"dismutase"] in eggNOG annotation file
#  - Annotations as KEGG ID corresponding to an urease, NiFe hydrgenase or superoxide dismutase

# The relative transformed RLE data (makes sense to sum them, reflect % of mapped reads in sample):
GeneMat = read.table("/Users/emifaure/Documents/ACE/NolwennCollab/Post_Review/GM_CDHit_relative-trans_MetalloEnzymes.tsv", sep="\t", header=T)

# The sample names are not in ACE_Seq format, we make the conversion :
for (i in 2:ncol(GeneMat)){
  names(GeneMat)[i] <- sub("_SORTED", "", names(GeneMat)[i])
  names(GeneMat)[i] <- metagenomes$ace_seq_name[which(metagenomes$sample==names(GeneMat)[i])]
}
row.names(GeneMat)=GeneMat$CDHit_ID
GeneMat_Nickel = GeneMat[,names(GeneMat) %in% meta_nickel$ACE_seq_name]
GeneMat_Nickel=as.data.frame(t(GeneMat_Nickel))

# Results from the ACE metagenomics catalog showed that abundance patterns can be extremely different across size fractions for one AGC, and it should make more sense to divide between A (>3 micrometers, attached and eukaryotes) and B+C (0.2-3 and 0.2-40 micrometers, which are dominated by prokaryotes).
meta_nickel_FL = meta_nickel[-which(meta_nickel$Size_fraction==">3 µm"),]
meta_nickel_ATT = meta_nickel[which(meta_nickel$Size_fraction==">3 µm"),]
GeneMat_Nickel_FL = GeneMat_Nickel[row.names(GeneMat_Nickel) %in% meta_nickel_FL$ACE_seq_name,]
GeneMat_Nickel_FL=GeneMat_Nickel_FL[match(meta_nickel_FL$ACE_seq_name,row.names(GeneMat_Nickel_FL)),]
#Remove columns of only 0
GeneMat_Nickel_FL = GeneMat_Nickel_FL[,-which(colSums(GeneMat_Nickel_FL)==0)]
GeneMat_Nickel_ATT = GeneMat_Nickel[row.names(GeneMat_Nickel) %in% meta_nickel_ATT$ACE_seq_name,]
GeneMat_Nickel_ATT=GeneMat_Nickel_ATT[match(meta_nickel_ATT$ACE_seq_name,row.names(GeneMat_Nickel_ATT)),]
#Remove columns of only 0
GeneMat_Nickel_ATT = GeneMat_Nickel_ATT[,-which(colSums(GeneMat_Nickel_ATT)==0)]

# Load functional annotations for the CD-Hit clusters
AnnotFull=read.table("/Users/emifaure/Documents/ACE/NolwennCollab/Annotations_CDHit_Metallo.tsv", sep="\t", header=F,fill=T, quote = '', na.strings =c('','-'))
names(AnnotFull)=c("Gene_ID","AGC_ID","AGC_Rep","AGC_Size","AGC_Cat","Singl_Cat","CDHit_Rep","Domain","KEGG","seed_ortholog","eggNOG_OGs","narr_OG_name","narr_OG_cat","narr_OG_desc","best_OG_name","best_OG_cat","best_OG_desc","Preferred_name","CAZy","BiGG_Reaction","PFAMs")
AnnotTransport=read.table("/Users/emifaure/Documents/ACE/NolwennCollab/Annotations_CDHit_nickeltransport.tsv", sep="\t", header=F,fill=T, quote = '', na.strings =c('','-'))
names(AnnotTransport)=c("Gene_ID","AGC_ID","AGC_Rep","AGC_Size","AGC_Cat","Singl_Cat","CDHit_Rep","Domain","KEGG","seed_ortholog","eggNOG_OGs","narr_OG_name","narr_OG_cat","narr_OG_desc","best_OG_name","best_OG_cat","best_OG_desc","Preferred_name","CAZy","BiGG_Reaction","PFAMs")

3 Broad Functional level investigations

# First we will agreggate the CD-Hit clusters by broad functional type
Func_simplify <- rep("Unclassified",nrow(AnnotFull))
Func_simplify[grepl("urease", do.call(paste0, AnnotFull), ignore.case=TRUE)]="Urease"
Func_simplify[grepl("ure", do.call(paste0, AnnotFull), ignore.case=TRUE)]="Urease"
Func_simplify[grepl("Ure", do.call(paste0, AnnotFull), ignore.case=TRUE)]="Urease"
Func_simplify[grepl("K03190", do.call(paste0, AnnotFull), ignore.case=TRUE)]="Urease"
Func_simplify[grepl("K03189", do.call(paste0, AnnotFull), ignore.case=TRUE)]="Urease"
Func_simplify[grepl("K03188", do.call(paste0, AnnotFull), ignore.case=TRUE)]="Urease"
Func_simplify[grepl("NiFe", do.call(paste0, AnnotFull), ignore.case=TRUE)]="NiFe Hydrogenase"
Func_simplify[grepl("hydrogenase", do.call(paste0, AnnotFull), ignore.case=TRUE)]="NiFe Hydrogenase"
Func_simplify[grepl("superoxide", do.call(paste0, AnnotFull), ignore.case=TRUE)]="SOD"
Func_simplify[grepl("sod", do.call(paste0, AnnotFull), ignore.case=TRUE)]="SOD"
Func_simplify[grepl("Sod_", do.call(paste0, AnnotFull), ignore.case=TRUE)]="SOD"
Func_simplify[grepl("K04565", do.call(paste0, AnnotFull), ignore.case=TRUE)]="SOD"

AnnotFull$FuncSimplify <- Func_simplify

Enzyme_IDs=unique(AnnotFull$FuncSimplify)[!is.na(unique(AnnotFull$FuncSimplify))]

#Initiate matrix for FL:
Enzyme_matrix_FL=GeneMat_Nickel_FL[,1:length(Enzyme_IDs)]
#Fill matrix
for (i in c(1:length(Enzyme_IDs))) {
  Enzyme_matrix_FL[,i]=rowSums(GeneMat_Nickel_FL[,which(names(GeneMat_Nickel_FL) %in% AnnotFull[which(AnnotFull$FuncSimplify==Enzyme_IDs[i]),"Gene_ID"]),drop=F])
  names(Enzyme_matrix_FL)[i]=Enzyme_IDs[i]
}

#Initiate matrix for ATT:
Enzyme_matrix_ATT=GeneMat_Nickel_ATT[,1:length(Enzyme_IDs)]
#Fill matrix
for (i in c(1:length(Enzyme_IDs))) {
  Enzyme_matrix_ATT[,i]=rowSums(GeneMat_Nickel_ATT[,which(names(GeneMat_Nickel_ATT) %in% AnnotFull[which(AnnotFull$FuncSimplify==Enzyme_IDs[i]),"Gene_ID"]),drop=F])
  names(Enzyme_matrix_ATT)[i]=Enzyme_IDs[i]
}

# Add the nickel data and metadata
EnzymeFL_Ni = merge(Enzyme_matrix_FL,meta_nickel_FL,by.x="row.names",by.y="ACE_seq_name")
EnzymeATT_Ni = merge(Enzyme_matrix_ATT,meta_nickel_ATT,by.x="row.names",by.y="ACE_seq_name")

# Plots

# FL SF :
EnzymeFL_Ni_plot <- EnzymeFL_Ni %>% select(SOD,Urease,`NiFe Hydrogenase`,Ni_60_58_D_DELTA_BOTTLE,MertzGlacier) %>%
  pivot_longer(-c(Ni_60_58_D_DELTA_BOTTLE,MertzGlacier),names_to = "Enzyme",values_to = "Abundance") %>%
  mutate(Enzyme = fct_relevel(Enzyme, 
                              "NiFe Hydrogenase", "SOD", "Urease"))


ggplot(EnzymeFL_Ni_plot) + 
  geom_smooth(aes(x=Ni_60_58_D_DELTA_BOTTLE,y=Abundance*100,col=Enzyme),method="lm",alpha=0.2) +
  geom_point(aes(x=Ni_60_58_D_DELTA_BOTTLE,y=Abundance*100,col=Enzyme,shape=MertzGlacier),size=3) +
  scale_color_brewer(palette = "Dark2") +
  theme_bw() +
  labs(y="Metagenomic relative abundance (% of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape="Mertz Glacier") +
  theme(text = element_text(size = 16)) +
  facet_wrap(~Enzyme,scales = "free",nrow = 1)

testFL <- data.frame(Enzyme=c("SOD","Urease","NiFe Hydrogenase"),pval=rep(0,3), coef=rep(0,3), R2=rep(0,3))
for(i in c(1:3)) {
  testFL$pval[i] <- perm_test(EnzymeFL_Ni[,testFL$Enzyme[i]],EnzymeFL_Ni$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided", B=1000)$p.value
  res=lm(EnzymeFL_Ni[,testFL$Enzyme[i]]~EnzymeFL_Ni$Ni_60_58_D_DELTA_BOTTLE)
  testFL$R2[i]=summary(res)$adj.r.squared
  testFL$coef[i]=res$coefficients[2]
}
testFL$pvalspear.adj=p.adjust(testFL$pval, method="BH")
testFL
##             Enzyme  pval          coef           R2 pvalspear.adj
## 1              SOD 0.313  9.143826e-05  0.045527667         0.313
## 2           Urease 0.192 -3.051448e-04 -0.001883718         0.288
## 3 NiFe Hydrogenase 0.130 -6.195455e-05 -0.016610441         0.288
# Try without Mertz :
EnzymeFL_Ni_nomertz <- EnzymeFL_Ni[-which(EnzymeFL_Ni$MertzGlacier==TRUE),]
testFL <- data.frame(Enzyme=c("SOD","Urease","NiFe Hydrogenase"),pval=rep(0,3))
for(i in c(1:3)) {
  testFL$pval[i] <- perm_test(EnzymeFL_Ni_nomertz[,testFL$Enzyme[i]],EnzymeFL_Ni_nomertz$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided", B=1000)$p.value
}
testFL$pvalspear.adj=p.adjust(testFL$pval, method="BH")
testFL
##             Enzyme  pval pvalspear.adj
## 1              SOD 0.857         0.857
## 2           Urease 0.079         0.237
## 3 NiFe Hydrogenase 0.422         0.633
# ATT SF :
EnzymeATT_Ni_plot <- EnzymeATT_Ni %>% select(SOD,Urease,`NiFe Hydrogenase`,Ni_60_58_D_DELTA_BOTTLE,MertzGlacier) %>%
  pivot_longer(-c(Ni_60_58_D_DELTA_BOTTLE,MertzGlacier),names_to = "Enzyme",values_to = "Abundance") %>%
  mutate(Enzyme = fct_relevel(Enzyme, 
                              "NiFe Hydrogenase", "SOD", "Urease"))


ggplot(EnzymeATT_Ni_plot) + 
  geom_smooth(aes(x=Ni_60_58_D_DELTA_BOTTLE,y=Abundance*100,col=Enzyme),method="lm",alpha=0.2) +
  geom_point(aes(x=Ni_60_58_D_DELTA_BOTTLE,y=Abundance*100,col=Enzyme,shape=MertzGlacier),size=3) +
  scale_color_brewer(palette = "Dark2") +
  theme_bw() +
  labs(y="Metagenomic relative abundance (% of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape="Mertz Glacier") +
  theme(text = element_text(size = 16)) +
  facet_wrap(~Enzyme,scales = "free",nrow=1)

#ggsave("Documents/ACE/NolwennCollab/Post_Review/Figure3_PanelB_PostReview_relab_V2.pdf",width = 40, height = 15, units = "cm", device = cairo_pdf)

testATT <- data.frame(Enzyme=c("SOD","Urease","NiFe Hydrogenase"),pval=rep(0,3), coef=rep(0,3), R2=rep(0,3))
for(i in c(1:3)) {
  testATT$pval[i] <- perm_test(EnzymeATT_Ni[,testATT$Enzyme[i]],EnzymeATT_Ni$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided", B=1000)$p.value
  res=lm(EnzymeATT_Ni[,testATT$Enzyme[i]]~EnzymeATT_Ni$Ni_60_58_D_DELTA_BOTTLE)
  testATT$R2[i]=summary(res)$adj.r.squared
  testATT$coef[i]=res$coefficients[2]
}
testATT$pvalspear.adj=p.adjust(testATT$pval, method="BH")
testATT
##             Enzyme  pval          coef          R2 pvalspear.adj
## 1              SOD 0.830  9.350593e-06 -0.06218151        0.8300
## 2           Urease 0.105  1.428125e-04  0.08866117        0.2565
## 3 NiFe Hydrogenase 0.171 -1.021772e-05 -0.01944228        0.2565

4 KEGG Functional level investigations

KEGG_IDs=unique(AnnotFull$KEGG)[!is.na(unique(AnnotFull$KEGG))]

#Initiate matrix :
KEGG_matrix_FL=GeneMat_Nickel_FL[,1:length(KEGG_IDs)]
#Fill matrix
for (i in c(1:length(KEGG_IDs))) {
  KEGG_matrix_FL[,i]=rowSums(GeneMat_Nickel_FL[,which(names(GeneMat_Nickel_FL) %in% AnnotFull[which(AnnotFull$KEGG==KEGG_IDs[i]),"Gene_ID"]),drop=F])
  names(KEGG_matrix_FL)[i]=KEGG_IDs[i]
}
KEGG_matrix_FL=KEGG_matrix_FL[,-which(colSums(KEGG_matrix_FL)==0)]

#Initiate matrix :
KEGG_matrix_ATT=GeneMat_Nickel_ATT[,1:length(KEGG_IDs)]
#Fill matrix
for (i in c(1:length(KEGG_IDs))) {
  KEGG_matrix_ATT[,i]=rowSums(GeneMat_Nickel_ATT[,which(names(GeneMat_Nickel_ATT) %in% AnnotFull[which(AnnotFull$KEGG==KEGG_IDs[i]),"Gene_ID"]), drop=F])
  names(KEGG_matrix_ATT)[i]=KEGG_IDs[i]
}
KEGG_matrix_ATT=KEGG_matrix_ATT[,-which(colSums(KEGG_matrix_ATT)==0)]

# FL size fraction

Results_lm_FL=data.frame(KEGG_ID=c("KEGGID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))

# We need to match orders of row names before computing any stats :
KEGG_matrix_FL=KEGG_matrix_FL[match(meta_nickel_FL$ACE_seq_name,row.names(KEGG_matrix_FL)),]

for (i in c(1:ncol(KEGG_matrix_FL))) {
  res=lm(KEGG_matrix_FL[,i]~meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE)
  agc_id=names(KEGG_matrix_FL)[i]
  R2=summary(res)$adj.r.squared
  pvallm=lmp(res)
  slope=res$coefficients[2]
  corspear=cor(KEGG_matrix_FL[,i],meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
  pvalspear=perm_test(KEGG_matrix_FL[,i],meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
  Results_lm_FL[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_FL$KEGG_ID=as.factor(Results_lm_FL$KEGG_ID)
Results_lm_FL$R2adj=as.numeric(Results_lm_FL$R2adj)
Results_lm_FL$Coef=as.numeric(Results_lm_FL$Coef)
Results_lm_FL$pvallm=as.numeric(Results_lm_FL$pvallm)
Results_lm_FL$pvallm.adj=p.adjust(Results_lm_FL$pvallm, method="BH")
Results_lm_FL$corspear=as.numeric(Results_lm_FL$corspear)
Results_lm_FL$pvalspear=as.numeric(Results_lm_FL$pvalspear)
Results_lm_FL$pvalspear.adj=p.adjust(Results_lm_FL$pvalspear, method="BH")
summary(Results_lm_FL)
##           KEGG_ID      R2adj               Coef                pvallm         
##  K03187       :1   Min.   :-0.03571   Min.   :-6.525e-05   Min.   :0.0004418  
##  K03188       :1   1st Qu.:-0.03502   1st Qu.:-1.002e-06   1st Qu.:0.2350029  
##  K03189       :1   Median :-0.00844   Median :-1.245e-07   Median :0.3915798  
##  K03190       :1   Mean   : 0.03547   Mean   :-5.374e-06   Mean   :0.5079253  
##  K03190,K03188:1   3rd Qu.: 0.01607   3rd Qu.: 2.020e-06   3rd Qu.:0.8917667  
##  K03192       :1   Max.   : 0.33866   Max.   : 2.389e-05   Max.   :0.9941537  
##  (Other)      :9                                                              
##     corspear          pvalspear        pvallm.adj       pvalspear.adj   
##  Min.   :-0.56021   Min.   :0.0000   Min.   :0.006628   Min.   :0.0000  
##  1st Qu.:-0.17482   1st Qu.:0.1010   1st Qu.:0.721413   1st Qu.:0.3330  
##  Median :-0.06106   Median :0.4840   Median :0.734212   Median :0.8561  
##  Mean   :-0.05216   Mean   :0.4163   Mean   :0.721312   Mean   :0.6002  
##  3rd Qu.: 0.09996   3rd Qu.:0.7085   3rd Qu.:0.994154   3rd Qu.:0.8561  
##  Max.   : 0.42897   Max.   :0.9240   Max.   :0.994154   Max.   :0.9240  
## 
# Nothing worth going further

# ATT size fraction

Results_lm_ATT=data.frame(KEGG_ID=c("KEGGID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0),
                          corspear=c(0.0),pvalspear=c(0.0))

# We need to match orders of row names before computing any stats :
KEGG_matrix_ATT=KEGG_matrix_ATT[match(meta_nickel_ATT$ACE_seq_name,row.names(KEGG_matrix_ATT)),]

for (i in c(1:ncol(KEGG_matrix_ATT))) {
  res=lm(KEGG_matrix_ATT[,i]~meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE)
  agc_id=names(KEGG_matrix_ATT)[i]
  R2=summary(res)$adj.r.squared
  pvallm=lmp(res)
  slope=res$coefficients[2]
  corspear=cor(KEGG_matrix_ATT[,i],meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
  pvalspear=perm_test(KEGG_matrix_ATT[,i],meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
  Results_lm_ATT[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_ATT$KEGG_ID=as.factor(Results_lm_ATT$KEGG_ID)
Results_lm_ATT$R2adj=as.numeric(Results_lm_ATT$R2adj)
Results_lm_ATT$Coef=as.numeric(Results_lm_ATT$Coef)
Results_lm_ATT$pvallm=as.numeric(Results_lm_ATT$pvallm)
Results_lm_ATT$pvallm.adj=p.adjust(Results_lm_ATT$pvallm, method="BH")
Results_lm_ATT$corspear=as.numeric(Results_lm_ATT$corspear)
Results_lm_ATT$pvalspear=as.numeric(Results_lm_ATT$pvalspear)
Results_lm_ATT$pvalspear.adj=p.adjust(Results_lm_ATT$pvalspear, method="BH")
summary(Results_lm_ATT)
##           KEGG_ID      R2adj               Coef                pvallm       
##  K03187       :1   Min.   :-0.06242   Min.   :-8.619e-06   Min.   :0.02811  
##  K03188       :1   1st Qu.:-0.03605   1st Qu.:-1.208e-06   1st Qu.:0.12093  
##  K03188,K03190:1   Median : 0.05973   Median :-3.780e-07   Median :0.16855  
##  K03189       :1   Mean   : 0.04839   Mean   : 6.287e-06   Mean   :0.31483  
##  K03190       :1   3rd Qu.: 0.09009   3rd Qu.: 1.436e-05   3rd Qu.:0.53176  
##  K03190,K03188:1   Max.   : 0.22120   Max.   : 3.738e-05   Max.   :0.97214  
##  (Other)      :7                                                            
##     corspear          pvalspear        pvallm.adj     pvalspear.adj   
##  Min.   :-0.46365   Min.   :0.0000   Min.   :0.2916   Min.   :0.0000  
##  1st Qu.:-0.29943   1st Qu.:0.0910   1st Qu.:0.2916   1st Qu.:0.2232  
##  Median : 0.04182   Median :0.1350   Median :0.2916   Median :0.2507  
##  Mean   : 0.05470   Mean   :0.3202   Mean   :0.4533   Mean   :0.4285  
##  3rd Qu.: 0.37242   3rd Qu.:0.3710   3rd Qu.:0.6913   3rd Qu.:0.4823  
##  Max.   : 0.49690   Max.   :0.9770   Max.   :0.9721   Max.   :0.9770  
## 
Results_lm_ATT[Results_lm_ATT$pvalspear.adj<0.05,] # This KO is absent everywhere but one sample
##          KEGG_ID      R2adj          Coef   pvallm   corspear pvalspear
## 11 K03190,K03188 0.06822015 -4.074998e-07 0.153546 -0.3975649         0
##    pvallm.adj pvalspear.adj
## 11  0.2915945             0
# Nothing worth going further

5 EggNOG Functional level investigations

EggNOG_IDs=unique(AnnotFull$best_OG_desc)[!is.na(unique(AnnotFull$best_OG_desc))]

#Initiate matrix :
EggNOG_matrix_FL=GeneMat_Nickel_FL[,1:length(EggNOG_IDs)]
#Fill matrix
for (i in c(1:length(EggNOG_IDs))) {
  EggNOG_matrix_FL[,i]=rowSums(GeneMat_Nickel_FL[,which(names(GeneMat_Nickel_FL) %in% AnnotFull[which(AnnotFull$best_OG_desc==EggNOG_IDs[i]),"Gene_ID"]),drop=F])
  names(EggNOG_matrix_FL)[i]=EggNOG_IDs[i]
}
EggNOG_matrix_FL=EggNOG_matrix_FL[,-which(colSums(EggNOG_matrix_FL)==0)]
#

#Initiate matrix :
EggNOG_matrix_ATT=GeneMat_Nickel_ATT[,1:length(EggNOG_IDs)]
#Fill matrix
for (i in c(1:length(EggNOG_IDs))) {
  EggNOG_matrix_ATT[,i]=rowSums(GeneMat_Nickel_ATT[,which(names(GeneMat_Nickel_ATT) %in% AnnotFull[which(AnnotFull$best_OG_desc==EggNOG_IDs[i]),"Gene_ID"]), drop=F])
  names(EggNOG_matrix_ATT)[i]=EggNOG_IDs[i]
}
EggNOG_matrix_ATT=EggNOG_matrix_ATT[,-which(colSums(EggNOG_matrix_ATT)==0)]

# FL size fraction

Results_lm_FL=data.frame(EggNOG_ID=c("EggNOGID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))

# We need to match orders of row names before computing any stats :
EggNOG_matrix_FL=EggNOG_matrix_FL[match(meta_nickel_FL$ACE_seq_name,row.names(EggNOG_matrix_FL)),]

for (i in c(1:ncol(EggNOG_matrix_FL))) {
  res=lm(EggNOG_matrix_FL[,i]~meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE)
  agc_id=names(EggNOG_matrix_FL)[i]
  R2=summary(res)$adj.r.squared
  pvallm=lmp(res)
  slope=res$coefficients[2]
  corspear=cor(EggNOG_matrix_FL[,i],meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
  pvalspear=perm_test(EggNOG_matrix_FL[,i],meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
  Results_lm_FL[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_FL$EggNOG_ID=as.factor(Results_lm_FL$EggNOG_ID)
Results_lm_FL$R2adj=as.numeric(Results_lm_FL$R2adj)
Results_lm_FL$Coef=as.numeric(Results_lm_FL$Coef)
Results_lm_FL$pvallm=as.numeric(Results_lm_FL$pvallm)
Results_lm_FL$pvallm.adj=p.adjust(Results_lm_FL$pvallm, method="BH")
Results_lm_FL$corspear=as.numeric(Results_lm_FL$corspear)
Results_lm_FL$pvalspear=as.numeric(Results_lm_FL$pvalspear)
Results_lm_FL$pvalspear.adj=p.adjust(Results_lm_FL$pvalspear, method="BH")
summary(Results_lm_FL)
##                                                                                                                                                                                                                                                                                                                                                                                          EggNOG_ID 
##  (urease) gamma subunit                                                                                                                                                                                                                                                                                                                                                                       : 1  
##  [NiFe]-hydrogenase assembly, chaperone, HybE                                                                                                                                                                                                                                                                                                                                                 : 1  
##  age-dependent response to reactive oxygen species                                                                                                                                                                                                                                                                                                                                            : 1  
##  Along with HypE, it catalyzes the synthesis of the CN ligands of the active site iron of NiFe -hydrogenases using carbamoylphosphate as a substrate. It functions as a carbamoyl transferase using carbamoylphosphate as a substrate and transferring the carboxamido moiety in an ATP-dependent reaction to the thiolate of the C-terminal cysteine of HypE yielding a protein-S-carboxamide: 1  
##  Belongs to the carbamoyltransferase HypF family                                                                                                                                                                                                                                                                                                                                              : 1  
##  Belongs to the membrane fusion protein (MFP) (TC 8.A.1) family / 4 iron, 4 sulfur cluster binding / NADH dehydrogenase                                                                                                                                                                                                                                                                       : 1  
##  (Other)                                                                                                                                                                                                                                                                                                                                                                                      :72  
##      R2adj               Coef                pvallm            corspear       
##  Min.   :-0.03569   Min.   :-9.082e-05   Min.   :0.001083   Min.   :-0.65411  
##  1st Qu.:-0.03144   1st Qu.:-3.558e-06   1st Qu.:0.109336   1st Qu.:-0.29824  
##  Median :-0.00610   Median :-5.484e-08   Median :0.372553   Median :-0.10745  
##  Mean   : 0.03015   Mean   :-3.534e-06   Mean   :0.417583   Mean   :-0.09439  
##  3rd Qu.: 0.05647   3rd Qu.: 2.546e-07   3rd Qu.:0.735957   3rd Qu.: 0.09142  
##  Max.   : 0.29740   Max.   : 7.691e-05   Max.   :0.979386   Max.   : 0.59276  
##                                                                               
##    pvalspear        pvallm.adj      pvalspear.adj   
##  Min.   :0.0000   Min.   :0.04261   Min.   :0.0000  
##  1st Qu.:0.0325   1st Qu.:0.41184   1st Qu.:0.1241  
##  Median :0.2940   Median :0.71348   Median :0.5805  
##  Mean   :0.3643   Mean   :0.64521   Mean   :0.5282  
##  3rd Qu.:0.6265   3rd Qu.:0.93972   3rd Qu.:0.8289  
##  Max.   :0.9950   Max.   :0.97939   Max.   :0.9950  
## 
# Some spearman might be worth investigating
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05),] %>% arrange(desc(corspear)) #14 significant according to spearman pre-correction, highest coeff = SOD
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            EggNOG_ID
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       COG2370 Hydrogenase urease accessory protein
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      UreD urease accessory protein
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     superoxide dismutase copper chaperone activity
## 4                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            g t u mismatch-specific dna glycosylase
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               Superoxide dismutase
## 6  NDH-1 shuttles electrons from NADH, via FMN and iron- sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be ubiquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient / NDH-1 shuttles electrons from NADH, via FMN and iron- sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be ubiquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient
## 7                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Respiratory-chain NADH dehydrogenase, 49 Kd subunit
## 8                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          COG0378 Ni2 -binding GTPase involved in regulation of expression and maturation of urease and hydrogenase
## 9                                                                                                                                                                                                                                                                                                                                                                                                                                            NDH-1 shuttles electrons from NADH, via FMN and iron- sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be ubiquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient
## 10                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                COG2032 Cu Zn superoxide dismutase
## 11                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       NAD binding
## 12                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         CobW/HypB/UreG, nucleotide-binding domain
## 13                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          Urease accessory protein
## 14                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            (urease) gamma subunit
## 15                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   radicals which are normally produced within the cells and which are toxic to biological systems
## 16                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   DNA glycosylase
## 17                                                                                                                                                                                                                                                                                                                                                                                                                                        NDH-1 shuttles electrons from NADH, via FMN and iron- sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be a menaquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient
## 18                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              COG0852 NADH ubiquinone oxidoreductase 27 kD subunit
## 19                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       regulation of superoxide dismutase activity
## 20                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              UreF
## 21                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             nickel cation binding
## 22                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  UreE urease accessory protein, N-terminal domain
## 23                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      enzyme active site formation
##           R2adj          Coef      pvallm   corspear pvalspear pvallm.adj
## 1   0.195043288  1.264936e-05 0.008450685  0.5927585     0.004 0.08106755
## 2   0.297398683  6.890656e-06 0.001083242  0.4397153     0.023 0.04260634
## 3   0.051670801  1.716544e-06 0.119435451  0.4364991     0.027 0.41184110
## 4   0.085844436  9.301425e-08 0.063848818  0.4180726     0.044 0.31126299
## 5   0.024419948  3.403543e-05 0.199597636  0.4158223     0.035 0.57661539
## 6  -0.013174211 -1.704886e-07 0.436598463 -0.3614642     0.016 0.72482144
## 7   0.022349642 -1.820982e-07 0.207755361 -0.3630656     0.017 0.57874708
## 8   0.001098142 -1.875359e-07 0.318415030 -0.3705649     0.029 0.70213612
## 9   0.006558944 -1.134318e-05 0.284338061 -0.4133322     0.034 0.70213612
## 10  0.072213511 -5.418907e-07 0.081877155 -0.4167865     0.013 0.35480101
## 11  0.207671144 -2.877692e-05 0.006629777 -0.4169510     0.010 0.08106755
## 12  0.202886917 -1.019447e-05 0.007270409 -0.4217123     0.032 0.08106755
## 13 -0.009172066 -8.230703e-06 0.398094262 -0.4443209     0.002 0.72482144
## 14  0.087794733 -9.082360e-05 0.061619842 -0.4469158     0.007 0.31126299
## 15  0.116787439 -1.951891e-05 0.036320384 -0.4577165     0.008 0.25754454
## 16  0.060399273 -1.853228e-05 0.101672825 -0.4913712     0.017 0.41184110
## 17  0.057222607 -3.991676e-06 0.107795538 -0.4938219     0.011 0.41184110
## 18  0.243158446 -5.924054e-06 0.003302969 -0.4978225     0.006 0.06440790
## 19  0.291122059 -5.857010e-07 0.001236706 -0.5209488     0.011 0.04260634
## 20  0.098427002 -8.033860e-05 0.050770311 -0.5241337     0.004 0.30462187
## 21  0.200256225 -4.804867e-05 0.007647473 -0.5381623     0.000 0.08106755
## 22  0.089787774 -8.706360e-05 0.059422730 -0.5509256     0.001 0.31126299
## 23  0.277635619 -2.777215e-05 0.001638705 -0.6541127     0.003 0.04260634
##    pvalspear.adj
## 1     0.05200000
## 2     0.10552941
## 3     0.11700000
## 4     0.14921739
## 5     0.12409091
## 6     0.08287500
## 7     0.08287500
## 8     0.11905263
## 9     0.12409091
## 10    0.07800000
## 11    0.07150000
## 12    0.12409091
## 13    0.05200000
## 14    0.06825000
## 15    0.06933333
## 16    0.08287500
## 17    0.07150000
## 18    0.06685714
## 19    0.07150000
## 20    0.05200000
## 21    0.00000000
## 22    0.03900000
## 23    0.05200000
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05 & Results_lm_FL$corspear>0),] %>% arrange(desc(corspear)) # Only 5 have coeff >0 and one passes the adjustment
##                                        EggNOG_ID      R2adj         Coef
## 1   COG2370 Hydrogenase urease accessory protein 0.19504329 1.264936e-05
## 2                  UreD urease accessory protein 0.29739868 6.890656e-06
## 3 superoxide dismutase copper chaperone activity 0.05167080 1.716544e-06
## 4        g t u mismatch-specific dna glycosylase 0.08584444 9.301425e-08
## 5                           Superoxide dismutase 0.02441995 3.403543e-05
##        pvallm  corspear pvalspear pvallm.adj pvalspear.adj
## 1 0.008450685 0.5927585     0.004 0.08106755     0.0520000
## 2 0.001083242 0.4397153     0.023 0.04260634     0.1055294
## 3 0.119435451 0.4364991     0.027 0.41184110     0.1170000
## 4 0.063848818 0.4180726     0.044 0.31126299     0.1492174
## 5 0.199597636 0.4158223     0.035 0.57661539     0.1240909
ggplot() + 
  geom_smooth(aes(y=EggNOG_matrix_FL[,"COG2370 Hydrogenase urease accessory protein"],x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=EggNOG_matrix_FL[,"COG2370 Hydrogenase urease accessory protein"],x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_FL$MertzGlacier), size=3) +
  theme_bw() +
  labs(y="COG2370 Hydrogenase urease accessory protein", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

ggplot() + 
  geom_smooth(aes(y=EggNOG_matrix_FL[,"Superoxide dismutase"],x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=EggNOG_matrix_FL[,"Superoxide dismutase"],x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_FL$MertzGlacier), size=3) +
  theme_bw() +
  labs(y="Superoxide dismutase", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

# Again, mertz seems to disturb the relationship. Let's try without it :
Results_lm_FL=data.frame(EggNOG_ID=c("EggNOGID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))

# We need to match orders of row names before computing any stats :
meta_nickel_FL_nomertz<-meta_nickel_FL[-which(meta_nickel_FL$MertzGlacier==TRUE),]
EggNOG_matrix_FL_nomertz=EggNOG_matrix_FL[match(meta_nickel_FL_nomertz$ACE_seq_name,row.names(EggNOG_matrix_FL)),]

for (i in c(1:ncol(EggNOG_matrix_FL_nomertz))) {
  res=lm(EggNOG_matrix_FL_nomertz[,i]~meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE)
  agc_id=names(EggNOG_matrix_FL_nomertz)[i]
  R2=summary(res)$adj.r.squared
  pvallm=lmp(res)
  slope=res$coefficients[2]
  corspear=cor(EggNOG_matrix_FL_nomertz[,i],meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
  pvalspear=perm_test(EggNOG_matrix_FL_nomertz[,i],meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
  Results_lm_FL[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_FL$EggNOG_ID=as.factor(Results_lm_FL$EggNOG_ID)
Results_lm_FL$R2adj=as.numeric(Results_lm_FL$R2adj)
Results_lm_FL$Coef=as.numeric(Results_lm_FL$Coef)
Results_lm_FL$pvallm=as.numeric(Results_lm_FL$pvallm)
Results_lm_FL$pvallm.adj=p.adjust(Results_lm_FL$pvallm, method="BH")
Results_lm_FL$corspear=as.numeric(Results_lm_FL$corspear)
Results_lm_FL$pvalspear=as.numeric(Results_lm_FL$pvalspear)
Results_lm_FL$pvalspear.adj=p.adjust(Results_lm_FL$pvalspear, method="BH")
summary(Results_lm_FL)
##                                                                                                                                                                                                                                                                                                                                                                                          EggNOG_ID 
##  (urease) gamma subunit                                                                                                                                                                                                                                                                                                                                                                       : 1  
##  [NiFe]-hydrogenase assembly, chaperone, HybE                                                                                                                                                                                                                                                                                                                                                 : 1  
##  age-dependent response to reactive oxygen species                                                                                                                                                                                                                                                                                                                                            : 1  
##  Along with HypE, it catalyzes the synthesis of the CN ligands of the active site iron of NiFe -hydrogenases using carbamoylphosphate as a substrate. It functions as a carbamoyl transferase using carbamoylphosphate as a substrate and transferring the carboxamido moiety in an ATP-dependent reaction to the thiolate of the C-terminal cysteine of HypE yielding a protein-S-carboxamide: 1  
##  Belongs to the carbamoyltransferase HypF family                                                                                                                                                                                                                                                                                                                                              : 1  
##  Belongs to the membrane fusion protein (MFP) (TC 8.A.1) family / 4 iron, 4 sulfur cluster binding / NADH dehydrogenase                                                                                                                                                                                                                                                                       : 1  
##  (Other)                                                                                                                                                                                                                                                                                                                                                                                      :72  
##      R2adj                Coef                pvallm         
##  Min.   :-0.039996   Min.   :-1.227e-04   Min.   :0.0004554  
##  1st Qu.:-0.036735   1st Qu.:-2.952e-06   1st Qu.:0.0905450  
##  Median :-0.008351   Median :-3.592e-08   Median :0.3841547  
##  Mean   : 0.036302   Mean   :-7.624e-06   Mean   :0.4331391  
##  3rd Qu.: 0.074792   3rd Qu.: 2.450e-07   3rd Qu.:0.7816091  
##  Max.   : 0.369902   Max.   : 7.468e-05   Max.   :0.9925182  
##                                                              
##     corspear          pvalspear         pvallm.adj      pvalspear.adj   
##  Min.   :-0.62271   Min.   :0.00000   Min.   :0.03383   Min.   :0.0000  
##  1st Qu.:-0.32283   1st Qu.:0.02725   1st Qu.:0.34856   1st Qu.:0.1040  
##  Median :-0.10804   Median :0.13650   Median :0.74911   Median :0.2691  
##  Mean   :-0.05991   Mean   :0.29788   Mean   :0.64544   Mean   :0.4106  
##  3rd Qu.: 0.11287   3rd Qu.:0.58175   3rd Qu.:0.97662   3rd Qu.:0.7536  
##  Max.   : 0.74989   Max.   :0.99000   Max.   :0.99252   Max.   :0.9900  
## 
# Some spearman might be worth investigating
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05),] %>% arrange(desc(corspear)) #17 significant according to spearman pre-correction, highest coeff = SOD
##                                                                                                                                                                                                                                                                                                                                                                                                                                     EggNOG_ID
## 1                                                                                                                                                                                                                                                                                                                                                                                                                      uracil-DNA glycosylase
## 2                                                                                                                                                                                                                                                                                                                                                                                                                        Superoxide dismutase
## 3                                                                                                                                                                                                                                                                                                                                                                                                COG2370 Hydrogenase urease accessory protein
## 4                                                                                                                                                                                                                                                                                                                                                                                              superoxide dismutase copper chaperone activity
## 5                                                                                                                                                                                                                                                                                                                                                                                                     g t u mismatch-specific dna glycosylase
## 6                                                                                                                                                                                                                                                                                                                                                                                                   Copper chaperone for superoxide dismutase
## 7                                                                                                                                                                                                                                                                                                                                                                                                                    urease accessory protein
## 8                                                                                                                                                                                                                                                                                                                                                                                                                             urease activity
## 9                                                                                                                                                                                                                                                                                                                                                                                                                  Superoxide dismutase Cu-Zn
## 10                                                                                                                                                                                                                                                                                                                                                                                        Respiratory-chain NADH dehydrogenase, 49 Kd subunit
## 11                                                                                                                                                                                                                                                                                                                                                                                                                                NAD binding
## 12                                                                                                                                                                                                                                                                                                                                   Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 13                                                                                                                                                                                                                                                                                                                                                                                                                     (urease) gamma subunit
## 14                                                                                                                                                                                                                                                                                                                                                                                                         COG2032 Cu Zn superoxide dismutase
## 15                                                                                                                                                                                                                                                                                                                                                                                                                   Urease accessory protein
## 16                                                                                                                                                                                                                                                                                                                                                                                    PFAM NADH ubiquinone oxidoreductase-like, 20kDa subunit
## 17                                                                                                                                                                                                                                                                                                                                                                                       COG0852 NADH ubiquinone oxidoreductase 27 kD subunit
## 18                                                                                                                                                                                                                                                                                                                                                                                                                                       UreF
## 19                                                                                                                                                                                                                                                                                                                                                                                                                      nickel cation binding
## 20                                                                                                                                                                                                                                                                                                                                  COG0378 Ni2 -binding GTPase involved in regulation of expression and maturation of urease and hydrogenase
## 21 NDH-1 shuttles electrons from NADH, via FMN and iron- sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be a menaquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient
## 22                                                                                                                                                                                                                                                                                                                                                                                                 belongs to the urease gamma subunit family
## 23                                                                                                                                                                                                                                                                                                                                                                                           UreE urease accessory protein, N-terminal domain
## 24                                                                                                                                                                                                                                                                                                                                                                                                regulation of superoxide dismutase activity
## 25                                                                                                                                                                                                                                                                                                                                                                                                                            DNA glycosylase
## 26                                                                                                                                                                                                                                                                                                                                                                                                               enzyme active site formation
##           R2adj          Coef       pvallm   corspear pvalspear pvallm.adj
## 1   0.338368495  4.044571e-05 0.0008673153  0.7498872     0.000 0.03382530
## 2   0.196218620  7.468500e-05 0.0119621763  0.6565720     0.000 0.14938730
## 3   0.369901707  1.953177e-05 0.0004554090  0.6541244     0.006 0.03382530
## 4   0.157527653  2.990610e-06 0.0230666265  0.6034780     0.005 0.14993307
## 5   0.185979737  1.483644e-07 0.0142571652  0.5363009     0.009 0.14938730
## 6   0.076489185  3.602725e-06 0.0879427789  0.5068467     0.011 0.34297684
## 7   0.113849882  1.292431e-05 0.0475992782  0.4130346     0.042 0.24355801
## 8   0.098908907  6.587840e-06 0.0608471438  0.4105870     0.041 0.24979354
## 9   0.187894257 -8.306746e-08 0.0137982937 -0.3154604     0.034 0.14938730
## 10 -0.008115644 -1.564331e-07 0.3823676781 -0.3220645     0.047 0.74910770
## 11  0.148026538 -2.866255e-05 0.0270365411 -0.3332836     0.018 0.15986740
## 12  0.050253761 -9.979725e-05 0.1357975628 -0.3423598     0.028 0.39526798
## 13  0.058566202 -9.321988e-05 0.1182435594 -0.3596748     0.015 0.38592041
## 14  0.045538208 -5.534945e-07 0.1469586080 -0.3977876     0.025 0.39526798
## 15 -0.026840801 -6.776739e-06 0.5764249665 -0.3981037     0.011 0.93669057
## 16 -0.038289592 -2.414750e-06 0.8408271245 -0.4069322     0.046 0.97661713
## 17  0.164486367 -5.692661e-06 0.0205226755 -0.4135931     0.023 0.14993307
## 18  0.069699954 -8.357698e-05 0.0983516351 -0.4421000     0.027 0.36530607
## 19  0.144454135 -4.852894e-05 0.0286941480 -0.4445476     0.001 0.15986740
## 20  0.001934573 -2.346633e-07 0.3152311006 -0.4504207     0.009 0.70251502
## 21  0.048302938 -4.565129e-06 0.1403018080 -0.4588323     0.021 0.39526798
## 22  0.178290080 -6.370265e-07 0.0162517526 -0.4619605     0.023 0.14938730
## 23  0.061575382 -9.004740e-05 0.1124880800 -0.4619868     0.004 0.38592041
## 24  0.295146639 -6.860529e-07 0.0020121365 -0.4748136     0.024 0.05231555
## 25  0.058311413 -2.194015e-05 0.1187447403 -0.4995596     0.013 0.38592041
## 26  0.239868352 -3.010220e-05 0.0055649296 -0.6227064     0.002 0.10851613
##    pvalspear.adj
## 1     0.00000000
## 2     0.00000000
## 3     0.06685714
## 4     0.06500000
## 5     0.07800000
## 6     0.07800000
## 7     0.13650000
## 8     0.13650000
## 9     0.12054545
## 10    0.14100000
## 11    0.10028571
## 12    0.10400000
## 13    0.09000000
## 14    0.10263158
## 15    0.07800000
## 16    0.14100000
## 17    0.10263158
## 18    0.10400000
## 19    0.02600000
## 20    0.07800000
## 21    0.10263158
## 22    0.10263158
## 23    0.06240000
## 24    0.10263158
## 25    0.08450000
## 26    0.03900000
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05 & Results_lm_FL$corspear>0),] %>% arrange(desc(corspear)) # 6 have coeff >0 and 3 passes the adjustment
##                                        EggNOG_ID      R2adj         Coef
## 1                         uracil-DNA glycosylase 0.33836849 4.044571e-05
## 2                           Superoxide dismutase 0.19621862 7.468500e-05
## 3   COG2370 Hydrogenase urease accessory protein 0.36990171 1.953177e-05
## 4 superoxide dismutase copper chaperone activity 0.15752765 2.990610e-06
## 5        g t u mismatch-specific dna glycosylase 0.18597974 1.483644e-07
## 6      Copper chaperone for superoxide dismutase 0.07648919 3.602725e-06
## 7                       urease accessory protein 0.11384988 1.292431e-05
## 8                                urease activity 0.09890891 6.587840e-06
##         pvallm  corspear pvalspear pvallm.adj pvalspear.adj
## 1 0.0008673153 0.7498872     0.000  0.0338253    0.00000000
## 2 0.0119621763 0.6565720     0.000  0.1493873    0.00000000
## 3 0.0004554090 0.6541244     0.006  0.0338253    0.06685714
## 4 0.0230666265 0.6034780     0.005  0.1499331    0.06500000
## 5 0.0142571652 0.5363009     0.009  0.1493873    0.07800000
## 6 0.0879427789 0.5068467     0.011  0.3429768    0.07800000
## 7 0.0475992782 0.4130346     0.042  0.2435580    0.13650000
## 8 0.0608471438 0.4105870     0.041  0.2497935    0.13650000
#Superoxide dismutase
#uracil-DNA glycosylase (narrow og desc = UreE urease accessory protein)
#COG2370 Hydrogenase urease accessory protein
EggNOG_matrix_FL_nomertz[,"uracil-DNA glycosylase"]
##  [1] 3.520051e-06 2.258165e-05 3.537414e-06 2.414043e-05 5.562218e-06
##  [6] 2.442940e-05 3.706262e-06 1.572073e-05 9.762735e-07 3.645000e-07
## [11] 3.371216e-06 5.168001e-06 5.860243e-06 5.161933e-06 1.323165e-07
## [16] 1.717060e-07 2.619178e-06 2.616938e-06 6.378028e-06 6.264758e-06
## [21] 6.266489e-06 5.205923e-07 3.767419e-06 3.307610e-06 2.041500e-05
## [26] 1.762316e-05 1.778057e-05
EggNOG_matrix_FL_nomertz[,"COG2370 Hydrogenase urease accessory protein"]
##  [1] 2.600635e-06 9.551650e-06 1.267744e-06 9.021162e-06 1.331799e-06
##  [6] 1.167964e-05 1.174261e-06 2.273760e-06 1.722836e-07 0.000000e+00
## [11] 5.805984e-06 4.772365e-06 8.080032e-06 5.807175e-06 1.323165e-07
## [16] 1.287795e-07 4.622078e-07 1.327433e-06 7.395534e-06 8.015778e-06
## [21] 7.465934e-06 6.941231e-08 1.753155e-06 1.699323e-06 8.540969e-06
## [26] 7.907082e-06 6.828448e-06
# 2 have adj p-values<0.01
ggplot() + 
  geom_smooth(aes(y=EggNOG_matrix_FL_nomertz[,"Superoxide dismutase"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=EggNOG_matrix_FL_nomertz[,"Superoxide dismutase"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE), size=3) +
  theme_bw() +
  labs(y="Superoxide dismutase (EggNOG best OG, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

ggplot() + 
  geom_smooth(aes(y=EggNOG_matrix_FL_nomertz[,"COG2370 Hydrogenase urease accessory protein"],x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=EggNOG_matrix_FL_nomertz[,"COG2370 Hydrogenase urease accessory protein"],x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE), size=3) +
  theme_bw() +
  labs(y="COG2370 Hydrogenase urease accessory protein (EggNOG best OG description)", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

ggplot() + 
  geom_smooth(aes(y=EggNOG_matrix_FL_nomertz[,"uracil-DNA glycosylase"],x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=EggNOG_matrix_FL_nomertz[,"uracil-DNA glycosylase"],x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_FL_nomertz$MertzGlacier), size=3) +
  theme_bw() +
  labs(y="uracil-DNA glycosylase (EggNOG best OG description)", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

# ATT size fraction

Results_lm_ATT=data.frame(EggNOG_ID=c("EggNOGID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))

# We need to match orders of row names before computing any stats :
EggNOG_matrix_ATT=EggNOG_matrix_ATT[match(meta_nickel_ATT$ACE_seq_name,row.names(EggNOG_matrix_ATT)),]

for (i in c(1:ncol(EggNOG_matrix_ATT))) {
  res=lm(EggNOG_matrix_ATT[,i]~meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE)
  agc_id=names(EggNOG_matrix_ATT)[i]
  R2=summary(res)$adj.r.squared
  pvallm=lmp(res)
  slope=res$coefficients[2]
  corspear=cor(EggNOG_matrix_ATT[,i],meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
  pvalspear=perm_test(EggNOG_matrix_ATT[,i],meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
  Results_lm_ATT[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_ATT$EggNOG_ID=as.factor(Results_lm_ATT$EggNOG_ID)
Results_lm_ATT$R2adj=as.numeric(Results_lm_ATT$R2adj)
Results_lm_ATT$Coef=as.numeric(Results_lm_ATT$Coef)
Results_lm_ATT$pvallm=as.numeric(Results_lm_ATT$pvallm)
Results_lm_ATT$pvallm.adj=p.adjust(Results_lm_ATT$pvallm, method="BH")
Results_lm_ATT$corspear=as.numeric(Results_lm_ATT$corspear)
Results_lm_ATT$pvalspear=as.numeric(Results_lm_ATT$pvalspear)
Results_lm_ATT$pvalspear.adj=p.adjust(Results_lm_ATT$pvalspear, method="BH")
summary(Results_lm_ATT)
##                                                                                                                                                                                                                                                                                                                                                                                          EggNOG_ID 
##  (urease) gamma subunit                                                                                                                                                                                                                                                                                                                                                                       : 1  
##  [NiFe]-hydrogenase assembly, chaperone, HybE                                                                                                                                                                                                                                                                                                                                                 : 1  
##  age-dependent response to reactive oxygen species                                                                                                                                                                                                                                                                                                                                            : 1  
##  Along with HypE, it catalyzes the synthesis of the CN ligands of the active site iron of NiFe -hydrogenases using carbamoylphosphate as a substrate. It functions as a carbamoyl transferase using carbamoylphosphate as a substrate and transferring the carboxamido moiety in an ATP-dependent reaction to the thiolate of the C-terminal cysteine of HypE yielding a protein-S-carboxamide: 1  
##  Belongs to the NiFe NiFeSe hydrogenase large subunit family                                                                                                                                                                                                                                                                                                                                  : 1  
##  Belongs to the urease                                                                                                                                                                                                                                                                                                                                                                        : 1  
##  (Other)                                                                                                                                                                                                                                                                                                                                                                                      :55  
##      R2adj                Coef                pvallm         
##  Min.   :-0.062493   Min.   :-1.170e-05   Min.   :0.0005066  
##  1st Qu.:-0.038402   1st Qu.:-1.024e-06   1st Qu.:0.1771164  
##  Median :-0.007245   Median :-8.870e-08   Median :0.3627533  
##  Mean   : 0.019979   Mean   : 2.372e-06   Mean   :0.4090096  
##  3rd Qu.: 0.055220   3rd Qu.: 9.330e-07   3rd Qu.:0.5508451  
##  Max.   : 0.511998   Max.   : 3.193e-05   Max.   :0.9920935  
##                                                              
##     corspear          pvalspear        pvallm.adj     pvalspear.adj   
##  Min.   :-0.54490   Min.   :0.0000   Min.   :0.0309   Min.   :0.0000  
##  1st Qu.:-0.29720   1st Qu.:0.0990   1st Qu.:0.6582   1st Qu.:0.3538  
##  Median :-0.11693   Median :0.3300   Median :0.6888   Median :0.6222  
##  Mean   :-0.07477   Mean   :0.3797   Mean   :0.7156   Mean   :0.5898  
##  3rd Qu.: 0.08273   3rd Qu.:0.6460   3rd Qu.:0.7203   3rd Qu.:0.8567  
##  Max.   : 0.71718   Max.   :0.9870   Max.   :0.9921   Max.   :0.9870  
## 
# Doesn't look as promising
Results_lm_ATT[which(Results_lm_ATT$pvalspear<0.05),] %>% arrange(desc(corspear))
##                                                                                                                                                                                                                                                                                                                                                                                                                                 EggNOG_ID
## 1                                                                                                                                                                                                                                                                                                                                                                                                           UreD urease accessory protein
## 2                                                                                                                                                                                                                                                                                         Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG
## 3                                                                                                                                                                                                                                                                                                                                                                                                          COG1573 Uracil-DNA glycosylase
## 4 NDH-1 shuttles electrons from NADH, via FMN and iron- sulfur (Fe-S) centers, to quinones in the respiratory chain. The immediate electron acceptor for the enzyme in this species is believed to be ubiquinone. Couples the redox reaction to proton translocation (for every two electrons transferred, four hydrogen ions are translocated across the cytoplasmic membrane), and thus conserves the redox energy in a proton gradient
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                    UreF
## 6                                                                                                                                                                                                                                                                                                                                         radicals which are normally produced within the cells and which are toxic to biological systems
## 7                                                                                                                                                                                                                                                                                                                                                                                        UreE urease accessory protein, N-terminal domain
## 8                                                                                                                                                                                                                                                                                                                                                                                                                             NAD binding
##          R2adj          Coef       pvallm   corspear pvalspear pvallm.adj
## 1  0.511998250  2.432805e-05 0.0005066074  0.7171848     0.007 0.03090305
## 2  0.085358581  1.678385e-05 0.1273282354  0.5327827     0.026 0.65817014
## 3  0.199355266  1.001660e-07 0.0361180764  0.3975649     0.000 0.55080066
## 4  0.009245237 -6.613806e-07 0.2977119684 -0.4054589     0.035 0.68876661
## 5  0.010834669 -1.566047e-06 0.2922290515 -0.4394834     0.015 0.68876661
## 6  0.047046889 -1.169910e-05 0.1938664332 -0.4423796     0.039 0.65817014
## 7 -0.021142608 -1.189283e-06 0.4326226215 -0.4848494     0.008 0.68876661
## 8  0.019298921 -2.570436e-06 0.2649608541 -0.5449049     0.005 0.68876661
##   pvalspear.adj
## 1     0.1220000
## 2     0.2643333
## 3     0.0000000
## 4     0.2973750
## 5     0.1830000
## 6     0.2973750
## 7     0.1220000
## 8     0.1220000
Results_lm_ATT[which(Results_lm_ATT$pvalspear<0.05 & Results_lm_ATT$corspear>0),] %>% arrange(desc(corspear)) # 6 have coeff >0 and 3 passes the adjustment
##                                                                                                                                         EggNOG_ID
## 1                                                                                                                   UreD urease accessory protein
## 2 Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG
## 3                                                                                                                  COG1573 Uracil-DNA glycosylase
##        R2adj         Coef       pvallm  corspear pvalspear pvallm.adj
## 1 0.51199825 2.432805e-05 0.0005066074 0.7171848     0.007 0.03090305
## 2 0.08535858 1.678385e-05 0.1273282354 0.5327827     0.026 0.65817014
## 3 0.19935527 1.001660e-07 0.0361180764 0.3975649     0.000 0.55080066
##   pvalspear.adj
## 1     0.1220000
## 2     0.2643333
## 3     0.0000000
ggplot() + 
  geom_smooth(aes(y=EggNOG_matrix_ATT[,"UreD urease accessory protein"],x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=EggNOG_matrix_ATT[,"UreD urease accessory protein"],x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
  theme_bw() +
  labs(y="UreD urease accessory protein (EggNOG annotation)", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

ggplot() + 
  geom_smooth(aes(y=EggNOG_matrix_ATT[,"Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG"],x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=EggNOG_matrix_ATT[,"Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG"],x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
  theme_bw() +
  labs(y="UreD urease accessory protein (EggNOG annotation)", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

ggplot() + 
  geom_smooth(aes(y=EggNOG_matrix_ATT[,"COG1573 Uracil-DNA glycosylase"],x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=EggNOG_matrix_ATT[,"COG1573 Uracil-DNA glycosylase"],x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
  theme_bw() +
  labs(y="COG1573 Uracil-DNA glycosylase (EggNOG annotation)", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

# --> Basically present in one spot only, irrelevant

# --> Clean figures with the most interesting ones
g1 <- ggplot() + 
  geom_smooth(aes(y=EggNOG_matrix_FL_nomertz[,"Superoxide dismutase"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2, col="grey20") +
  geom_point(aes(y=EggNOG_matrix_FL[,"Superoxide dismutase"]*100,x=meta_nickel_FL[, "Ni_60_58_D_DELTA_BOTTLE"], shape=meta_nickel_FL[, "MertzGlacier"], col=meta_nickel_FL[, "MertzGlacier"]), size=3) +
  scale_color_manual(values=c("black","skyblue")) +
  theme_bw() +
  labs(y="Superoxide dismutase\n(EggNOG best OG description, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape="Mertz Glacier", col="Mertz Glacier") +
  theme(text = element_text(size = 16))
g1

g2 <- ggplot() + 
  geom_smooth(aes(y=EggNOG_matrix_FL_nomertz[,"COG2370 Hydrogenase urease accessory protein"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2, col="grey20") +
  geom_point(aes(y=EggNOG_matrix_FL[,"COG2370 Hydrogenase urease accessory protein"]*100,x=meta_nickel_FL[, "Ni_60_58_D_DELTA_BOTTLE"], shape=meta_nickel_FL[, "MertzGlacier"], col=meta_nickel_FL[, "MertzGlacier"]), size=3) +
  scale_color_manual(values=c("black","skyblue")) +
  theme_bw() +
  labs(y="COG2370 Hydrogenase urease accessory protein\n(EggNOG best OG description, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape= "Mertz Glacier", col="Mertz Glacier") +
  theme(text = element_text(size = 16))
g2

g3 <- ggplot() + 
  geom_smooth(aes(y=EggNOG_matrix_ATT[,"UreD urease accessory protein"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2, col="grey20") +
  geom_point(aes(y=EggNOG_matrix_ATT[,"UreD urease accessory protein"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
  theme_bw() +
  labs(y="UreD urease accessory protein\n(EggNOG best OG description, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape= "Mertz Glacier") +
  theme(text = element_text(size = 16))
g3

g4 <- ggplot() + 
  geom_smooth(aes(y=EggNOG_matrix_ATT[,"Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2, col="grey20") +
  geom_point(aes(y=EggNOG_matrix_ATT[,"Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
  theme_bw() +
  labs(y="Facilitates the functional incorporation\nof the urease nickel metallocenter\n(EggNOG best OG description, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape= "Mertz Glacier") +
  theme(text = element_text(size = 16))
g4

6 PFAM annotation level

PFAM_IDs=unique(AnnotFull$PFAMs)[!is.na(unique(AnnotFull$PFAMs))]

#Initiate matrix :
PFAM_matrix_FL=GeneMat_Nickel_FL[,1:length(PFAM_IDs)]
#Fill matrix
for (i in c(1:length(PFAM_IDs))) {
  PFAM_matrix_FL[,i]=rowSums(GeneMat_Nickel_FL[,which(names(GeneMat_Nickel_FL) %in% AnnotFull[which(AnnotFull$PFAMs==PFAM_IDs[i]),"Gene_ID"]),drop=F])
  names(PFAM_matrix_FL)[i]=PFAM_IDs[i]
}
PFAM_matrix_FL=PFAM_matrix_FL[,-which(colSums(PFAM_matrix_FL)==0)]
#

#Initiate matrix :
PFAM_matrix_ATT=GeneMat_Nickel_ATT[,1:length(PFAM_IDs)]
#Fill matrix
for (i in c(1:length(PFAM_IDs))) {
  PFAM_matrix_ATT[,i]=rowSums(GeneMat_Nickel_ATT[,which(names(GeneMat_Nickel_ATT) %in% AnnotFull[which(AnnotFull$PFAMs==PFAM_IDs[i]),"Gene_ID"]), drop=F])
  names(PFAM_matrix_ATT)[i]=PFAM_IDs[i]
}
PFAM_matrix_ATT=PFAM_matrix_ATT[,-which(colSums(PFAM_matrix_ATT)==0)]

# FL size fraction

Results_lm_FL=data.frame(PFAM_ID=c("PFAM_ID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))

# We need to match orders of row names before computing any stats :
PFAM_matrix_FL=PFAM_matrix_FL[match(meta_nickel_FL$ACE_seq_name,row.names(PFAM_matrix_FL)),]

for (i in c(1:ncol(PFAM_matrix_FL))) {
  res=lm(PFAM_matrix_FL[,i]~meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE)
  agc_id=names(PFAM_matrix_FL)[i]
  R2=summary(res)$adj.r.squared
  pvallm=lmp(res)
  slope=res$coefficients[2]
  corspear=cor(PFAM_matrix_FL[,i],meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
  pvalspear=perm_test(PFAM_matrix_FL[,i],meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
  Results_lm_FL[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_FL$PFAM_ID=as.factor(Results_lm_FL$PFAM_ID)
Results_lm_FL$R2adj=as.numeric(Results_lm_FL$R2adj)
Results_lm_FL$Coef=as.numeric(Results_lm_FL$Coef)
Results_lm_FL$pvallm=as.numeric(Results_lm_FL$pvallm)
Results_lm_FL$pvallm.adj=p.adjust(Results_lm_FL$pvallm, method="BH")
Results_lm_FL$corspear=as.numeric(Results_lm_FL$corspear)
Results_lm_FL$pvalspear=as.numeric(Results_lm_FL$pvalspear)
Results_lm_FL$pvalspear.adj=p.adjust(Results_lm_FL$pvalspear, method="BH")
summary(Results_lm_FL)
##                                                                                                                                                                                                                                                                                                                                                                                                     PFAM_ID  
##  AAA_11,AAA_12,ABC_membrane,ABC_tran,ACC_central,Biotin_carb_C,Biotin_carb_N,Biotin_lipoyl,Biotin_lipoyl_2,CPSase_L_D2,Carboxyl_trans,Complex1_49kDa,DUF3347,DUF4347,DUF559,HlyD_3,HlyD_D23,NiFeSe_Hases,Oxidored_q6,Poly_export,RNA_pol_Rpb1_5,SLBB,YXWGXW                                                                                                                                             : 1  
##  Abhydrolase_2,Esterase_phd                                                                                                                                                                                                                                                                                                                                                                             : 1  
##  Acetyltransf_1,Acetyltransf_7,DAO,DUF1858,DUF1990,DoxX_3,EF-hand_1,EF-hand_5,EF-hand_7,Epimerase,FAD_binding_6,Fer2,Fer2_4,Fer2_BFD,Fer4_11,Ferric_reduct,NAD_binding_1,NAD_binding_10,NAD_binding_4,NIR_SIR,NIR_SIR_ferr,NO_synthase,NiFe_hyd_SSU_C,NifU,NifU_N,NmrA,Oxidored_q6,Pyr_redox_2,Reductase_C,Rieske_2,SUKH_5,Semialdhyde_dh,SnoaL_4,SusD-like_3,SusD_RagB,TP_methylase,TrkA_N,cNMP_binding: 1  
##  Acylphosphatase,FGGY_C,HycI,Sua5_yciO_yrdC,zf-HYPF                                                                                                                                                                                                                                                                                                                                                     : 1  
##  Acylphosphatase,FGGY_C,Peptidase_M22,Sua5_yciO_yrdC,zf-HYPF                                                                                                                                                                                                                                                                                                                                            : 1  
##  Acylphosphatase,Peptidase_M22,Sua5_yciO_yrdC,zf-HYPF                                                                                                                                                                                                                                                                                                                                                   : 1  
##  (Other)                                                                                                                                                                                                                                                                                                                                                                                                :62  
##      R2adj               Coef                pvallm             corspear      
##  Min.   :-0.03571   Min.   :-1.018e-04   Min.   :0.0000656   Min.   :-0.6365  
##  1st Qu.:-0.03311   1st Qu.:-1.980e-06   1st Qu.:0.2360188   1st Qu.:-0.2105  
##  Median :-0.01381   Median :-3.108e-08   Median :0.4432562   Median :-0.0942  
##  Mean   : 0.02077   Mean   :-4.017e-06   Mean   :0.4807697   Mean   :-0.0588  
##  3rd Qu.: 0.01583   3rd Qu.: 2.704e-08   3rd Qu.:0.7924878   3rd Qu.: 0.1075  
##  Max.   : 0.41941   Max.   : 5.569e-05   Max.   :0.9944291   Max.   : 0.4498  
##                                                                               
##    pvalspear        pvallm.adj       pvalspear.adj   
##  Min.   :0.0000   Min.   :0.004461   Min.   :0.0000  
##  1st Qu.:0.1010   1st Qu.:0.806988   1st Qu.:0.3686  
##  Median :0.3885   Median :0.873662   Median :0.7532  
##  Mean   :0.3772   Mean   :0.773306   Mean   :0.5999  
##  3rd Qu.:0.6000   3rd Qu.:0.969673   3rd Qu.:0.7564  
##  Max.   :0.9880   Max.   :0.994429   Max.   :0.9880  
## 
# Some spearman might be worth investigating
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05),] %>% arrange(desc(corspear)) #A few significant according to spearman pre-correction
##                                                  PFAM_ID        R2adj
## 1                           HMA,PB1,Sod_Cu,TPR_1,XPG_I_2  0.052903447
## 2                                                 Sod_Ni  0.066270307
## 3 Complex1_30kDa,Complex1_49kDa,NiFeSe_Hases,Oxidored_q6 -0.006919077
## 4                                           Urease_gamma  0.093195141
## 5                            Complex1_49kDa,NiFeSe_Hases  0.002600695
## 6             Complex1_30kDa,Complex1_49kDa,NiFeSe_Hases  0.273567894
## 7                                          NiFe-hyd_HybE -0.023604992
## 8                          Glycos_trans_3N,UreE_C,UreE_N  0.253073478
## 9     Amidohydro_1,Urease_alpha,Urease_beta,Urease_gamma  0.419412752
##            Coef       pvallm   corspear pvalspear  pvallm.adj pvalspear.adj
## 1  1.721716e-06 1.167422e-01  0.4497786     0.022 0.610651531     0.1870000
## 2  3.981461e-05 9.128547e-02  0.4378836     0.022 0.517284303     0.1870000
## 3 -2.147331e-07 3.785046e-01 -0.3206927     0.048 0.830268202     0.3609231
## 4 -1.017815e-04 5.584680e-02 -0.3489698     0.020 0.379758258     0.1870000
## 5 -1.041466e-05 3.085543e-01 -0.4657873     0.004 0.806988141     0.0680000
## 6 -3.959465e-05 1.782399e-03 -0.5067443     0.001 0.050963816     0.0340000
## 7 -1.315814e-07 5.695261e-01 -0.5076382     0.007 0.926780457     0.0952000
## 8 -9.727454e-06 2.707013e-03 -0.5712114     0.003 0.050963816     0.0680000
## 9 -1.485475e-05 6.560013e-05 -0.6365281     0.000 0.004460809     0.0000000
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05 & Results_lm_FL$corspear>0),] %>% arrange(desc(corspear)) # Only 2 have coeff >0 and none passes the adjustment
##                        PFAM_ID      R2adj         Coef     pvallm  corspear
## 1 HMA,PB1,Sod_Cu,TPR_1,XPG_I_2 0.05290345 1.721716e-06 0.11674220 0.4497786
## 2                       Sod_Ni 0.06627031 3.981461e-05 0.09128547 0.4378836
##   pvalspear pvallm.adj pvalspear.adj
## 1     0.022  0.6106515         0.187
## 2     0.022  0.5172843         0.187
ggplot() + 
  geom_smooth(aes(y=PFAM_matrix_FL[,"Sod_Ni"]*100,x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=PFAM_matrix_FL[,"Sod_Ni"]*100,x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_FL$MertzGlacier), size=3) +
  theme_bw() +
  labs(y="Sod_Ni", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

# Seems nice, maybe significant without Mertz

ggplot() + 
  geom_smooth(aes(y=PFAM_matrix_FL[,"HMA,PB1,Sod_Cu,TPR_1,XPG_I_2"]*100,x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=PFAM_matrix_FL[,"HMA,PB1,Sod_Cu,TPR_1,XPG_I_2"]*100,x=meta_nickel_FL$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_FL$MertzGlacier), size=3) +
  theme_bw() +
  labs(y="HMA,PB1,Sod_Cu,TPR_1,XPG_I_2", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

# Not particularly high in station 8/11

# Again, mertz seems to disturb the relationship. Let's try without it :
Results_lm_FL=data.frame(PFAM_ID=c("PFAM_ID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))

PFAM_matrix_FL_nomertz=PFAM_matrix_FL[match(meta_nickel_FL_nomertz$ACE_seq_name,row.names(PFAM_matrix_FL)),]

for (i in c(1:ncol(PFAM_matrix_FL_nomertz))) {
  res=lm(PFAM_matrix_FL_nomertz[,i]~meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE)
  agc_id=names(PFAM_matrix_FL_nomertz)[i]
  R2=summary(res)$adj.r.squared
  pvallm=lmp(res)
  slope=res$coefficients[2]
  corspear=cor(PFAM_matrix_FL_nomertz[,i],meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
  pvalspear=perm_test(PFAM_matrix_FL_nomertz[,i],meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
  Results_lm_FL[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_FL$PFAM_ID=as.factor(Results_lm_FL$PFAM_ID)
Results_lm_FL$R2adj=as.numeric(Results_lm_FL$R2adj)
Results_lm_FL$Coef=as.numeric(Results_lm_FL$Coef)
Results_lm_FL$pvallm=as.numeric(Results_lm_FL$pvallm)
Results_lm_FL$pvallm.adj=p.adjust(Results_lm_FL$pvallm, method="BH")
Results_lm_FL$corspear=as.numeric(Results_lm_FL$corspear)
Results_lm_FL$pvalspear=as.numeric(Results_lm_FL$pvalspear)
Results_lm_FL$pvalspear.adj=p.adjust(Results_lm_FL$pvalspear, method="BH")
summary(Results_lm_FL)
##                                                                                                                                                                                                                                                                                                                                                                                                     PFAM_ID  
##  AAA_11,AAA_12,ABC_membrane,ABC_tran,ACC_central,Biotin_carb_C,Biotin_carb_N,Biotin_lipoyl,Biotin_lipoyl_2,CPSase_L_D2,Carboxyl_trans,Complex1_49kDa,DUF3347,DUF4347,DUF559,HlyD_3,HlyD_D23,NiFeSe_Hases,Oxidored_q6,Poly_export,RNA_pol_Rpb1_5,SLBB,YXWGXW                                                                                                                                             : 1  
##  Abhydrolase_2,Esterase_phd                                                                                                                                                                                                                                                                                                                                                                             : 1  
##  Acetyltransf_1,Acetyltransf_7,DAO,DUF1858,DUF1990,DoxX_3,EF-hand_1,EF-hand_5,EF-hand_7,Epimerase,FAD_binding_6,Fer2,Fer2_4,Fer2_BFD,Fer4_11,Ferric_reduct,NAD_binding_1,NAD_binding_10,NAD_binding_4,NIR_SIR,NIR_SIR_ferr,NO_synthase,NiFe_hyd_SSU_C,NifU,NifU_N,NmrA,Oxidored_q6,Pyr_redox_2,Reductase_C,Rieske_2,SUKH_5,Semialdhyde_dh,SnoaL_4,SusD-like_3,SusD_RagB,TP_methylase,TrkA_N,cNMP_binding: 1  
##  Acylphosphatase,FGGY_C,HycI,Sua5_yciO_yrdC,zf-HYPF                                                                                                                                                                                                                                                                                                                                                     : 1  
##  Acylphosphatase,FGGY_C,Peptidase_M22,Sua5_yciO_yrdC,zf-HYPF                                                                                                                                                                                                                                                                                                                                            : 1  
##  Acylphosphatase,Peptidase_M22,Sua5_yciO_yrdC,zf-HYPF                                                                                                                                                                                                                                                                                                                                                   : 1  
##  (Other)                                                                                                                                                                                                                                                                                                                                                                                                :62  
##      R2adj               Coef                pvallm            corspear       
##  Min.   :-0.03995   Min.   :-1.282e-04   Min.   :0.000219   Min.   :-0.57852  
##  1st Qu.:-0.03633   1st Qu.:-1.191e-06   1st Qu.:0.147754   1st Qu.:-0.24457  
##  Median :-0.02600   Median :-1.265e-08   Median :0.564541   Median :-0.05276  
##  Mean   : 0.02738   Mean   :-8.785e-06   Mean   :0.482931   Mean   :-0.01712  
##  3rd Qu.: 0.04522   3rd Qu.: 1.165e-07   3rd Qu.:0.768658   3rd Qu.: 0.12981  
##  Max.   : 0.40412   Max.   : 8.104e-05   Max.   :0.972520   Max.   : 0.72541  
##                                                                               
##    pvalspear        pvallm.adj      pvalspear.adj   
##  Min.   :0.0000   Min.   :0.01489   Min.   :0.0000  
##  1st Qu.:0.0640   1st Qu.:0.56254   1st Qu.:0.2447  
##  Median :0.2455   Median :0.95037   Median :0.4833  
##  Mean   :0.3581   Mean   :0.74105   Mean   :0.5199  
##  3rd Qu.:0.6020   3rd Qu.:0.95037   3rd Qu.:0.7951  
##  Max.   :0.9970   Max.   :0.97252   Max.   :0.9970  
## 
# Some spearman might be worth investigating
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05),] %>% arrange(desc(corspear)) #14 significant according to spearman pre-correction, highest coeff = LysE,UDG and SOD_Ni
##                                                                                                                                                                                                                                                                                                                                                                                                    PFAM_ID
## 1                                                                                                                                                                                                                                                                                                                                                                                                 LysE,UDG
## 2                                                                                                                                                                                                                                                                                                                                                                                                   Sod_Ni
## 3                                                                                                                                                                                                                                                                                                                                                                             HMA,PB1,Sod_Cu,TPR_1,XPG_I_2
## 4                                                                                                                                                                                                                                                                                                                                                                                                HupE_UreJ
## 5                                                                                                                                                                                                                                                                                                                                                                                                UreD,UreF
## 6                                                                                                                                                                                                                                                                                                                                                                                               HMA,Sod_Cu
## 7                                                                                                                                                                                                                                                                                                                           Amidohydro_1,DUF599,Fungal_trans,HMG_box,Urease_alpha,Urease_beta,Urease_gamma
## 8  Acetyltransf_1,Acetyltransf_7,DAO,DUF1858,DUF1990,DoxX_3,EF-hand_1,EF-hand_5,EF-hand_7,Epimerase,FAD_binding_6,Fer2,Fer2_4,Fer2_BFD,Fer4_11,Ferric_reduct,NAD_binding_1,NAD_binding_10,NAD_binding_4,NIR_SIR,NIR_SIR_ferr,NO_synthase,NiFe_hyd_SSU_C,NifU,NifU_N,NmrA,Oxidored_q6,Pyr_redox_2,Reductase_C,Rieske_2,SUKH_5,Semialdhyde_dh,SnoaL_4,SusD-like_3,SusD_RagB,TP_methylase,TrkA_N,cNMP_binding
## 9                                                                                                                                                                                                                                                                                                                                                                                            UreE_C,UreE_N
## 10                                                                                                                                                                                                                                                                                                                                                                                       Sod_Fe_C,Sod_Fe_N
## 11                                                                                                                                                                                                                                                                                                                                                                                            Urease_gamma
## 12                                                                                                                                                                                                                                                                                                                                                              Complex1_30kDa,Complex1_49kDa,NiFeSe_Hases
## 13                                                                                                                                                                                                                                                                                                                                                                             Complex1_49kDa,NiFeSe_Hases
## 14                                                                                                                                                                                                                                                                                                                                                                                           NiFe-hyd_HybE
## 15                                                                                                                                                                                                                                                                                                                                                                           Glycos_trans_3N,UreE_C,UreE_N
## 16                                                                                                                                                                                                                                                                                                                                                      Amidohydro_1,Urease_alpha,Urease_beta,Urease_gamma
##           R2adj          Coef       pvallm   corspear pvalspear pvallm.adj
## 1   0.331866595  3.962379e-05 0.0009872402  0.7254111     0.002 0.02981513
## 2   0.317265809  8.103740e-05 0.0013153735  0.6935922     0.000 0.02981513
## 3   0.151121519  2.938053e-06 0.0256758123  0.6319618     0.001 0.19309924
## 4   0.145079670  3.434406e-05 0.0283969464  0.5097153     0.013 0.19309924
## 5   0.111128767  7.398130e-06 0.0497784818  0.5055999     0.010 0.30772152
## 6   0.078478490  3.655282e-06 0.0851109119  0.4807996     0.012 0.41953053
## 7   0.098908907  6.587840e-06 0.0608471438  0.4105870     0.034 0.34480048
## 8   0.019805370  8.241820e-08 0.2282943965  0.3643920     0.040 0.73923900
## 9   0.046628547 -1.063562e-04 0.1442940943 -0.3136003     0.049 0.56253836
## 10  0.037648507 -1.045910e-04 0.1678821425 -0.3224729     0.032 0.60084135
## 11  0.077583070 -1.135822e-04 0.0863739331 -0.3854989     0.018 0.41953053
## 12  0.209854816 -4.026930e-05 0.0094478715 -0.3965132     0.021 0.09177932
## 13 -0.008788139 -1.100548e-05 0.3875126796 -0.4149795     0.021 0.87836207
## 14 -0.032870989 -1.188045e-07 0.6813962508 -0.4866966     0.018 0.95036552
## 15  0.248018950 -1.128776e-05 0.0048073750 -0.5446816     0.009 0.06538030
## 16  0.404117117 -1.660958e-05 0.0002190093 -0.5785237     0.007 0.01489263
##    pvalspear.adj
## 1     0.04533333
## 2     0.00000000
## 3     0.03400000
## 4     0.11050000
## 5     0.11050000
## 6     0.11050000
## 7     0.16514286
## 8     0.18133333
## 9     0.20800000
## 10    0.16514286
## 11    0.11900000
## 12    0.11900000
## 13    0.11900000
## 14    0.11900000
## 15    0.11050000
## 16    0.11050000
Results_lm_FL[which(Results_lm_FL$pvalspear<0.05 & Results_lm_FL$corspear>0),] %>% arrange(desc(corspear)) # 7 have coeff >0 and 3 passes the adjustment
##                                                                                                                                                                                                                                                                                                                                                                                                   PFAM_ID
## 1                                                                                                                                                                                                                                                                                                                                                                                                LysE,UDG
## 2                                                                                                                                                                                                                                                                                                                                                                                                  Sod_Ni
## 3                                                                                                                                                                                                                                                                                                                                                                            HMA,PB1,Sod_Cu,TPR_1,XPG_I_2
## 4                                                                                                                                                                                                                                                                                                                                                                                               HupE_UreJ
## 5                                                                                                                                                                                                                                                                                                                                                                                               UreD,UreF
## 6                                                                                                                                                                                                                                                                                                                                                                                              HMA,Sod_Cu
## 7                                                                                                                                                                                                                                                                                                                          Amidohydro_1,DUF599,Fungal_trans,HMG_box,Urease_alpha,Urease_beta,Urease_gamma
## 8 Acetyltransf_1,Acetyltransf_7,DAO,DUF1858,DUF1990,DoxX_3,EF-hand_1,EF-hand_5,EF-hand_7,Epimerase,FAD_binding_6,Fer2,Fer2_4,Fer2_BFD,Fer4_11,Ferric_reduct,NAD_binding_1,NAD_binding_10,NAD_binding_4,NIR_SIR,NIR_SIR_ferr,NO_synthase,NiFe_hyd_SSU_C,NifU,NifU_N,NmrA,Oxidored_q6,Pyr_redox_2,Reductase_C,Rieske_2,SUKH_5,Semialdhyde_dh,SnoaL_4,SusD-like_3,SusD_RagB,TP_methylase,TrkA_N,cNMP_binding
##        R2adj         Coef       pvallm  corspear pvalspear pvallm.adj
## 1 0.33186660 3.962379e-05 0.0009872402 0.7254111     0.002 0.02981513
## 2 0.31726581 8.103740e-05 0.0013153735 0.6935922     0.000 0.02981513
## 3 0.15112152 2.938053e-06 0.0256758123 0.6319618     0.001 0.19309924
## 4 0.14507967 3.434406e-05 0.0283969464 0.5097153     0.013 0.19309924
## 5 0.11112877 7.398130e-06 0.0497784818 0.5055999     0.010 0.30772152
## 6 0.07847849 3.655282e-06 0.0851109119 0.4807996     0.012 0.41953053
## 7 0.09890891 6.587840e-06 0.0608471438 0.4105870     0.034 0.34480048
## 8 0.01980537 8.241820e-08 0.2282943965 0.3643920     0.040 0.73923900
##   pvalspear.adj
## 1    0.04533333
## 2    0.00000000
## 3    0.03400000
## 4    0.11050000
## 5    0.11050000
## 6    0.11050000
## 7    0.16514286
## 8    0.18133333
ggplot() + 
  geom_smooth(aes(y=PFAM_matrix_FL_nomertz[,"Sod_Ni"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=PFAM_matrix_FL_nomertz[,"Sod_Ni"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE), size=3) +
  theme_bw() +
  labs(y="Sod_Ni", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

ggplot() + 
  geom_smooth(aes(y=PFAM_matrix_FL_nomertz[,"LysE,UDG"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=PFAM_matrix_FL_nomertz[,"LysE,UDG"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE), size=3) +
  theme_bw() +
  labs(y="LysE,UDG", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

# ATT size fraction

Results_lm_ATT=data.frame(PFAM_ID=c("PFAM_ID"),R2adj=c(0.0),Coef=c(0.0), pvallm=c(0.0), corspear=c(0.0),pvalspear=c(0.0))

# We need to match orders of row names before computing any stats :
PFAM_matrix_ATT=PFAM_matrix_ATT[match(meta_nickel_ATT$ACE_seq_name,row.names(PFAM_matrix_ATT)),]

for (i in c(1:ncol(PFAM_matrix_ATT))) {
  res=lm(PFAM_matrix_ATT[,i]~meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE)
  agc_id=names(PFAM_matrix_ATT)[i]
  R2=summary(res)$adj.r.squared
  pvallm=lmp(res)
  slope=res$coefficients[2]
  corspear=cor(PFAM_matrix_ATT[,i],meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE,method = "spearman")
  pvalspear=perm_test(PFAM_matrix_ATT[,i],meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE,method = "Spearman", alternative = "two.sided")$p.value
  Results_lm_ATT[i,]=c(agc_id,R2,slope,pvallm,corspear,pvalspear)
}
Results_lm_ATT$PFAM_ID=as.factor(Results_lm_ATT$PFAM_ID)
Results_lm_ATT$R2adj=as.numeric(Results_lm_ATT$R2adj)
Results_lm_ATT$Coef=as.numeric(Results_lm_ATT$Coef)
Results_lm_ATT$pvallm=as.numeric(Results_lm_ATT$pvallm)
Results_lm_ATT$pvallm.adj=p.adjust(Results_lm_ATT$pvallm, method="BH")
Results_lm_ATT$corspear=as.numeric(Results_lm_ATT$corspear)
Results_lm_ATT$pvalspear=as.numeric(Results_lm_ATT$pvalspear)
Results_lm_ATT$pvalspear.adj=p.adjust(Results_lm_ATT$pvalspear, method="BH")
summary(Results_lm_ATT)
##                                                                                                                                                                                                                                                                                                                                                                                                     PFAM_ID  
##  Acetyltransf_1,Acetyltransf_7,DAO,DUF1858,DUF1990,DoxX_3,EF-hand_1,EF-hand_5,EF-hand_7,Epimerase,FAD_binding_6,Fer2,Fer2_4,Fer2_BFD,Fer4_11,Ferric_reduct,NAD_binding_1,NAD_binding_10,NAD_binding_4,NIR_SIR,NIR_SIR_ferr,NO_synthase,NiFe_hyd_SSU_C,NifU,NifU_N,NmrA,Oxidored_q6,Pyr_redox_2,Reductase_C,Rieske_2,SUKH_5,Semialdhyde_dh,SnoaL_4,SusD-like_3,SusD_RagB,TP_methylase,TrkA_N,cNMP_binding: 1  
##  Acylphosphatase,FGGY_C,HycI,Sua5_yciO_yrdC,zf-HYPF                                                                                                                                                                                                                                                                                                                                                     : 1  
##  Acylphosphatase,FGGY_C,Peptidase_M22,Sua5_yciO_yrdC,zf-HYPF                                                                                                                                                                                                                                                                                                                                            : 1  
##  Acylphosphatase,Sua5_yciO_yrdC,zf-HYPF                                                                                                                                                                                                                                                                                                                                                                 : 1  
##  Amidohydro_1,DUF599,Fungal_trans,HMG_box,Urease_alpha,Urease_beta,Urease_gamma                                                                                                                                                                                                                                                                                                                         : 1  
##  Amidohydro_1,Urease_alpha,Urease_beta,Urease_gamma                                                                                                                                                                                                                                                                                                                                                     : 1  
##  (Other)                                                                                                                                                                                                                                                                                                                                                                                                :51  
##      R2adj               Coef                pvallm             corspear      
##  Min.   :-0.06249   Min.   :-1.864e-05   Min.   :0.0003622   Min.   :-0.5449  
##  1st Qu.:-0.03226   1st Qu.:-8.858e-07   1st Qu.:0.1535460   1st Qu.:-0.2940  
##  Median :-0.00151   Median :-1.757e-07   Median :0.3382864   Median :-0.1169  
##  Mean   : 0.03294   Mean   : 2.645e-06   Mean   :0.3688405   Mean   :-0.0724  
##  3rd Qu.: 0.06822   3rd Qu.: 1.429e-06   3rd Qu.:0.5033539   3rd Qu.: 0.1398  
##  Max.   : 0.53124   Max.   : 3.405e-05   Max.   :0.9929004   Max.   : 0.7180  
##                                                                               
##    pvalspear        pvallm.adj      pvalspear.adj   
##  Min.   :0.0000   Min.   :0.02065   Min.   :0.0000  
##  1st Qu.:0.1050   1st Qu.:0.53334   1st Qu.:0.3856  
##  Median :0.3160   Median :0.64327   Median :0.5928  
##  Mean   :0.3667   Mean   :0.62430   Mean   :0.5769  
##  3rd Qu.:0.5850   3rd Qu.:0.66090   3rd Qu.:0.7755  
##  Max.   :0.9320   Max.   :0.99290   Max.   :0.9320  
## 
# Some spearman might be worth investigating
Results_lm_ATT[which(Results_lm_ATT$pvalspear<0.05),] %>% arrange(desc(corspear)) #A few significant according to spearman pre-correction
##                                                                  PFAM_ID
## 1                                                              UreD,UreF
## 2                     Acylphosphatase,FGGY_C,HycI,Sua5_yciO_yrdC,zf-HYPF
## 3                     Amidohydro_1,Urease_alpha,Urease_beta,Urease_gamma
## 4                                                             HMA,Sod_Cu
## 5 Complex1_30kDa,Complex1_49kDa,Methyltransf_28,NiFeSe_Hases,Oxidored_q6
##        R2adj          Coef       pvallm   corspear pvalspear pvallm.adj
## 1 0.53124021  2.306228e-05 0.0003622142  0.7179752     0.006 0.02064621
## 2 0.06822015 -5.821426e-08 0.1535460437 -0.3975649     0.000 0.53334166
## 3 0.09526412 -2.677682e-07 0.1142963274 -0.4879092     0.023 0.53334166
## 4 0.16172942 -8.715604e-07 0.0551235463 -0.4947732     0.017 0.49741488
## 5 0.01849502 -2.419457e-06 0.2674190909 -0.5449049     0.003 0.64327267
##   pvalspear.adj
## 1       0.11400
## 2       0.00000
## 3       0.26220
## 4       0.24225
## 5       0.08550
Results_lm_ATT[which(Results_lm_ATT$pvalspear<0.05 & Results_lm_ATT$corspear>0),] %>% arrange(desc(corspear)) # Only 1 have coeff >0 and none passes the adjustment
##     PFAM_ID     R2adj         Coef       pvallm  corspear pvalspear pvallm.adj
## 1 UreD,UreF 0.5312402 2.306228e-05 0.0003622142 0.7179752     0.006 0.02064621
##   pvalspear.adj
## 1         0.114
ggplot() + 
  geom_smooth(aes(y=PFAM_matrix_ATT[,"UreD,UreF"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2) +
  geom_point(aes(y=PFAM_matrix_ATT[,"UreD,UreF"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
  theme_bw() +
  labs(y="UreD,UreF", x=expression(delta^60 * Ni ~ "(‰)")) +
  theme(text = element_text(size = 16))

# The most interesting can go into figures :

g5 <- ggplot() + 
  geom_smooth(aes(y=PFAM_matrix_FL_nomertz[,"Sod_Ni"]*100,x=meta_nickel_FL_nomertz$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2, col="grey20") +
  geom_point(aes(y=PFAM_matrix_FL[,"Sod_Ni"]*100,x=meta_nickel_FL[, "Ni_60_58_D_DELTA_BOTTLE"], shape=meta_nickel_FL[, "MertzGlacier"], col=meta_nickel_FL[, "MertzGlacier"]), size=3) +
  scale_color_manual(values=c("black","skyblue")) +
  theme_bw() +
  labs(y="Sod_Ni\n(PFAM annotation, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape= "Mertz Glacier", col="Mertz Glacier") +
  theme(text = element_text(size = 16))
g5

g6 <- ggplot() + 
  geom_smooth(aes(y=PFAM_matrix_ATT[,"UreD,UreF"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE),method="lm",alpha=0.2, col="grey20") +
  geom_point(aes(y=PFAM_matrix_ATT[,"UreD,UreF"]*100,x=meta_nickel_ATT$Ni_60_58_D_DELTA_BOTTLE, shape=meta_nickel_ATT$MertzGlacier), size=3) +
  theme_bw() +
  labs(y="UreD,UreF\n(PFAM annotation, % of coverage)", x=expression(delta^60 * Ni ~ "(‰)"), shape= "Mertz Glacier") +
  theme(text = element_text(size = 16))
g6

7 AGNOSTOS cluster level

# Load AGC abundances
GeneMat = read.table("/Users/emifaure/Documents/ACE/NolwennCollab/Post_Review/GM_AGN_TMAX60GQ_transposed_Metallo-without-nickelgenes_rCLR-transformed.tsv", sep="\t")

# If using AGC table non rCLR transformed :
#AGCList_withtoutgeneralist=read.table("/Users/emifaure/Documents/ACE/NolwennCollab/AGC_Metallo_WithoutNickelGenes.tsv", sep="\t", header=F)
#GeneMat=GeneMat[,which(names(GeneMat) %in% AGCList_withtoutgeneralist$V1)]

GeneMat_Nickel = GeneMat[row.names(GeneMat) %in% meta_nickel$ACE_seq_name,]

GeneMat_Nickel_FL = GeneMat[row.names(GeneMat) %in% meta_nickel_FL$ACE_seq_name,]
GeneMat_Nickel_FL=GeneMat_Nickel_FL[match(meta_nickel_FL$ACE_seq_name,row.names(GeneMat_Nickel_FL)),]
#Remove columns of only 0
GeneMat_Nickel_FL = GeneMat_Nickel_FL[,-which(colSums(GeneMat_Nickel_FL)==0)]

GeneMat_Nickel_ATT = GeneMat[row.names(GeneMat) %in% meta_nickel_ATT$ACE_seq_name,]
GeneMat_Nickel_ATT=GeneMat_Nickel_ATT[match(meta_nickel_ATT$ACE_seq_name,row.names(GeneMat_Nickel_ATT)),]
#Remove columns of only 0
GeneMat_Nickel_ATT = GeneMat_Nickel_ATT[,-which(colSums(GeneMat_Nickel_ATT)==0)]

# We want to associate taxonomy to each AGC, load nickel-related contigs taxo :
Taxopos=read.table('/Users/emifaure/Documents/ACE/NolwennCollab/Post_Review/Taxonomy_Uniref90_Contigs_metallo_WithoutNickelGenes_WithKrakenLineage.tsv', sep="\t", header=F,fill=T, quote = '', na.strings="")
# Information in column 6 is redundant with column 5
Taxopos=Taxopos[,-6]
names(Taxopos)=c("Contig","Seed","HighestRank","HighestAnnot_uniref","Annot_uniref","Annot_gtdb")

Taxopos=separate_wider_delim(Taxopos, cols = Annot_uniref, delim = ";", names = c("Species_uniref","Genus_uniref","Family_uniref","Order_uniref",
                                                                           "Class_uniref","Phylum_uniref","Domain_uniref","Root_uniref"))
Taxopos=separate_wider_delim(Taxopos, cols = Annot_gtdb, delim = ";", names = c("Root_gtdb","Domain_gtdb","Phylum_gtdb","Class_gtdb","Order_gtdb","Family_gtdb","Genus_gtdb","Species_gtdb"), too_few = c("align_start"))

# Needs cleaning
Taxopos=select(Taxopos, -c("Seed","Root_gtdb","Root_uniref"))

Genes=AnnotFull
Genes$Contigs_ID=sub("_[^_]+$", "", Genes$Gene_ID)
Genes$Contigs_ID=gsub("ACE_", "", Genes$Contigs_ID)

Genes_Taxo_Annot=merge(Genes,Taxopos,by.x="Contigs_ID", by.y="Contig")
Genes_Taxo_Annot <- Genes_Taxo_Annot %>% replace_with_na_all(condition = ~.x == "")
Genes_Taxo_Annot <- Genes_Taxo_Annot %>% mutate_if(is.character,as.factor)
Genes_Taxo_Annot<-as.data.frame(Genes_Taxo_Annot)
summary(Genes_Taxo_Annot)
##        Contigs_ID                       Gene_ID               AGC_ID     
##  G105_260239:    9   ACE_G1_100688_153808264:    1   AGC_7792447 : 2463  
##  G142_206063:    9   ACE_G1_102611_4772586  :    1   AGC_3476410 : 1676  
##  G101_245639:    8   ACE_G1_104559_74923114 :    1   AGC_15613456: 1334  
##  G102_143898:    8   ACE_G1_108198_66157108 :    1   AGC_8803290 : 1230  
##  G103_22249 :    8   ACE_G1_110264_118743107:    1   AGC_27255931:  816  
##  G106_115049:    8   ACE_G1_112699_74923939 :    1   AGC_1382956 :  799  
##  (Other)    :29837   (Other)                :29881   (Other)     :21569  
##                            AGC_Rep         AGC_Size        AGC_Cat     
##  ACE_G36_000000069010_49679286 : 2463   Min.   :    1.0   DISC : 7648  
##  ACE_G152_000000097994_90230753: 1676   1st Qu.:   50.0   EU   :  229  
##  ACE_G174_000000033486_3784048 : 1334   Median :  267.0   GU   : 1827  
##  ACE_G86_000000123516_86804048 : 1230   Mean   :  766.8   K    :17826  
##  ACE_G91_000000108792_139750272:  816   3rd Qu.: 1230.0   KWP  : 1635  
##  ACE_G20_000000033145_127731905:  799   Max.   :14756.0   SINGL:  722  
##  (Other)                       :21569                                  
##  Singl_Cat                      CDHit_Rep            Domain     
##  EU :   67   ACE_G101_69959_35742    :  118   Eukaryote : 1337  
##  GU :  171   ACE_G103_122771_145070  :  102   Prokaryote:28550  
##  K  :  210   ACE_G6_41384_85802909   :   97                     
##  KWP:  274   ACE_G94_172308_148699120:   84                     
##  NA :29165   ACE_G162_58692_11784686 :   83                     
##              ACE_G216_112674_5147263 :   83                     
##              (Other)                 :29320                     
##       KEGG                             seed_ortholog  
##  K04564 : 5267   666509.RCA23_c18480          :  712  
##  K03188 : 2355   2903.EOD32519                :  453  
##  K03190 : 2241   643867.Ftrac_3655            :  453  
##  K03187 : 2139   1189620.AJXL01000002_gene1866:  426  
##  K03189 : 1604   1131266.ARWQ01000023_gene389 :  325  
##  (Other): 1438   1131266.ARWQ01000023_gene386 :  303  
##  NA's   :14843   (Other)                      :27215  
##                                                                                                                                     eggNOG_OGs   
##  COG0605@1|root,COG0605@2|Bacteria,1MVW2@1224|Proteobacteria,2TU3T@28211|Alphaproteobacteria                                             : 1475  
##  COG0804@1|root,2QPKB@2759|Eukaryota                                                                                                     :  894  
##  COG0605@1|root,COG0605@2|Bacteria,1MVW2@1224|Proteobacteria,2TU3T@28211|Alphaproteobacteria,4BQ03@82117|unclassified Alphaproteobacteria:  597  
##  COG2032@1|root,COG2032@2|Bacteria                                                                                                       :  593  
##  2A42S@1|root,30SMP@2|Bacteria                                                                                                           :  532  
##  arCOG06188@1|root,arCOG06188@2157|Archaea,41STJ@651137|Thaumarchaeota                                                                   :  522  
##  (Other)                                                                                                                                 :25274  
##                                        narr_OG_name    narr_OG_cat  
##  2TU3T@28211|Alphaproteobacteria             : 1475   O      :9291  
##  2QPKB@2759|Eukaryota                        :  894   P      :6244  
##  4BQ03@82117|unclassified Alphaproteobacteria:  597   E      :4020  
##  COG2032@2|Bacteria                          :  593   S      :3096  
##  30SMP@2|Bacteria                            :  532   C      :3051  
##  41STJ@651137|Thaumarchaeota                 :  522   KO     :2572  
##  (Other)                                     :25274   (Other):1613  
##                                                                                                                                           narr_OG_desc  
##  Destroys radicals which are normally produced within the cells and which are toxic to biological systems                                       : 5382  
##  Required for maturation of urease via the functional incorporation of the urease nickel metallocenter                                          : 4321  
##  Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG: 2561  
##  Nickel-containing superoxide dismutase                                                                                                         : 2439  
##  Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly                    : 1874  
##  superoxide dismutase activity                                                                                                                  : 1638  
##  (Other)                                                                                                                                        :11672  
##                           best_OG_name    best_OG_cat  
##  2TU3T@28211|Alphaproteobacteria: 2942   O      :9613  
##  4NGGW@976|Bacteroidetes        : 1022   P      :7591  
##  2QPKB@2759|Eukaryota           :  894   E      :4020  
##  4NDZ4@976|Bacteroidetes        :  872   S      :3093  
##  COG2032@2|Bacteria             :  593   KO     :2744  
##  1G5DA@1117|Cyanobacteria       :  571   (Other):2818  
##  (Other)                        :22993   NA's   :   8  
##                                                                                                                                           best_OG_desc  
##  Destroys radicals which are normally produced within the cells and which are toxic to biological systems                                       : 5388  
##  Required for maturation of urease via the functional incorporation of the urease nickel metallocenter                                          : 4437  
##  Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG: 2625  
##  Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly                    : 1933  
##  Nickel-containing superoxide dismutase                                                                                                         : 1593  
##  (Other)                                                                                                                                        :13903  
##  NA's                                                                                                                                           :    8  
##  Preferred_name   CAZy      
##  sodB   :3627   NA's:29887  
##  ureG   :2614               
##  ureF   :2308               
##  ureD   :2274               
##  ureE   :2009               
##  (Other):8221               
##  NA's   :8834               
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            BiGG_Reaction  
##  iJN678.ureG                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      :  267  
##  iECH74115_1262.ECH74115_1322,iECO103_1326.ECO103_3798,iECO111_1330.ECO111_1228,iECSP_1301.ECSP_1250,iECs_1301.ECs1323,iUMNK88_1353.UMNK88_1200,iZ_1308.Z1144,iZ_1308.Z1583                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       :  179  
##  iECH74115_1262.ECH74115_1321,iECO103_1326.ECO103_3797,iECO111_1330.ECO111_1227,iECO26_1355.ECO26_1280,iECSP_1301.ECSP_1249,iECs_1301.ECs1322,iUMNK88_1353.UMNK88_1199,iZ_1308.Z1143,iZ_1308.Z1582                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                :  147  
##  iAPECO1_1312.APECO1_76,iUTI89_1310.UTI89_C1040                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   :   84  
##  iAF1260.b1656,iAPECO1_1312.APECO1_736,iB21_1397.B21_01616,iBWG_1329.BWG_1471,iE2348C_1286.E2348C_1742,iEC042_1314.EC042_1825,iEC55989_1330.EC55989_1824,iECABU_c1320.ECABU_c19090,iECBD_1354.ECBD_1987,iECB_1328.ECB_01626,iECDH10B_1368.ECDH10B_1790,iECDH1ME8569_1439.ECDH1ME8569_1601,iECD_1391.ECD_01626,iECED1_1282.ECED1_1855,iECH74115_1262.ECH74115_2368,iECIAI1_1343.ECIAI1_1708,iECO103_1326.ECO103_1797,iECO111_1330.ECO111_2126,iECO26_1355.ECO26_2385,iECOK1_1307.ECOK1_1775,iECP_1309.ECP_1601,iECS88_1305.ECS88_1705,iECSE_1348.ECSE_1780,iECSP_1301.ECSP_2222,iECUMN_1333.ECUMN_1946,iECW_1372.ECW_m1823,iECs_1301.ECs2365,iEKO11_1354.EKO11_2118,iETEC_1333.ETEC_1691,iEcDH1_1363.EcDH1_1984,iEcE24377_1341.EcE24377A_1869,iEcHS_1320.EcHS_A1735,iEcSMS35_1347.EcSMS35_1542,iEcolC_1368.EcolC_1973,iG2583_1286.G2583_2051,iJO1366.b1656,iJR904.b1656,iLF82_1304.LF82_2148,iNRG857_1313.NRG857_08300,iSBO_1134.SBO_1475,iSDY_1059.SDY_1882,iSFV_1184.SFV_1678,iSF_1195.SF1684,iSFxv_1172.SFxv_1892,iSSON_1240.SSON_1500,iS_1188.S1816,iSbBS512_1146.SbBS512_E1853,iUMN146_1321.UM146_08870,iUMNK88_1353.UMNK88_2117,iUTI89_1310.UTI89_C1847,iWFL_1372.ECW_m1823,iY75_1357.Y75_RS08680,iZ_1308.Z2678,ic_1306.c2050:   80  
##  (Other)                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          :  165  
##  NA's                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             :28965  
##                PFAMs                 FuncSimplify         HighestRank   
##  Sod_Fe_C,Sod_Fe_N: 5193   NiFe Hydrogenase: 1739   species     :12032  
##  cobW             : 3216   SOD             :11180   no rank     : 8908  
##  UreF             : 3045   Urease          :16968   phylum      : 2195  
##  UreE_C,UreE_N    : 2720                            superkingdom: 1866  
##  UreD             : 2700                            order       : 1208  
##  (Other)          :12810                            class       : 1179  
##  NA's             :  203                            (Other)     : 2499  
##              HighestAnnot_uniref                Species_uniref 
##  root                  : 6532    unknown               : 7758  
##  Bacteria              : 1458    uc_Bacteria           : 1458  
##  unclassified          : 1198    uc_Thaumarchaeota     : 1023  
##  Thaumarchaeota        : 1023    Phaeocystis antarctica:  818  
##  Phaeocystis antarctica:  818    uc_Proteobacteria     :  740  
##  Proteobacteria        :  740    (Other)               :16892  
##  (Other)               :18118    NA's                  : 1198  
##             Genus_uniref             Family_uniref              Order_uniref  
##  unknown          :14592   unknown          :12382   unknown          :10929  
##  uc_Bacteria      : 1458   uc_Bacteria      : 1458   Rhodobacterales  : 2119  
##  uc_Thaumarchaeota: 1023   uc_Thaumarchaeota: 1023   uc_Bacteria      : 1458  
##  Phaeocystis      :  818   Rhodobacteraceae :  988   Flavobacteriales : 1310  
##  uc_Proteobacteria:  740   Phaeocystaceae   :  818   uc_Thaumarchaeota: 1023  
##  (Other)          :10058   (Other)          :12020   (Other)          :11850  
##  NA's             : 1198   NA's             : 1198   NA's             : 1198  
##               Class_uniref          Phylum_uniref        Domain_uniref  
##  unknown            :10746   Proteobacteria:9810   unknown      :25944  
##  Alphaproteobacteria: 5259   unknown       :7936   uc_Bacteria  : 1458  
##  Gammaproteobacteria: 2839   Thaumarchaeota:1635   Viridiplantae:  532  
##  uc_Bacteria        : 1458   Bacteroidetes :1594   uc_Eukaryota :  223  
##  Flavobacteriia     : 1311   uc_Bacteria   :1458   uc_Archaea   :  185  
##  (Other)            : 7076   (Other)       :6256   (Other)      :  347  
##  NA's               : 1198   NA's          :1198   NA's         : 1198  
##     Domain_gtdb             Phylum_gtdb                   Class_gtdb  
##   Archaea : 3225    Pseudomonadota:15692    Alphaproteobacteria:8665  
##   Bacteria:26048    Thermoproteota: 2449    Gammaproteobacteria:6901  
##  NA's     :  614    Bacteroidota  : 2141    Nitrososphaeria    :2448  
##                     Nitrospinota  : 1418    Bacteroidia        :2122  
##                     Actinomycetota: 1162    Nitrospinia        :1417  
##                    (Other)        : 4969   (Other)             :6116  
##                    NA's           : 2056   NA's                :2218  
##               Order_gtdb                Family_gtdb   
##   Rhodobacterales  : 4618    Rhodobacteraceae : 4618  
##   Pseudomonadales  : 2945    Nitrosopumilaceae: 2448  
##   Nitrososphaerales: 2448    VA-1             : 1416  
##   Nitrospinales    : 1417    Alteromonadaceae : 1147  
##   Flavobacteriales : 1404    Flavobacteriaceae:  903  
##  (Other)           :13951   (Other)           :15985  
##  NA's              : 3104   NA's              : 3370  
##              Genus_gtdb                         Species_gtdb  
##   Planktomarina   : 1510    Psychrobacter sp000511655 :  571  
##   Nitrosopelagicus: 1364    Planktomarina sp028822275 :  499  
##   Nitrosopumilus  : 1081    Octadecabacter sp028825445:  451  
##   Nitromaritima   :  785    Planktomarina sp905182195 :  446  
##   Psychrobacter   :  602    GCA-2721545 sp905612245   :  408  
##  (Other)          :20720   (Other)                    :22738  
##  NA's             : 3825   NA's                       : 4774
# Finally, switch to AGC level (which will oversimplify but for visuals it's the only option)
AGC_Taxo_Annot = Genes_Taxo_Annot %>% select(c(AGC_ID,"Species_uniref","Genus_uniref","Family_uniref","Order_uniref",
                                               "Class_uniref","Phylum_uniref","Domain_uniref","Domain_gtdb","Phylum_gtdb",
                                               "Class_gtdb","Order_gtdb","Family_gtdb","Genus_gtdb","Species_gtdb"))
Mode <- function(x) {
  ux <- na.omit(unique(x))
  ux[which.max(tabulate(match(x, ux)))]
}

AGC_Taxo_Annot = AGC_Taxo_Annot %>% 
  group_by(AGC_ID) %>%
  summarise_all(Mode)

summary(AGC_Taxo_Annot)
##           AGC_ID                         Species_uniref      Genus_uniref
##  AGC_10001997:   1   unknown                    : 271   unknown    :887  
##  AGC_10048680:   1   Phaeocystis antarctica     :  98   Phaeocystis: 97  
##  AGC_10052113:   1   uc_Bacteria                :  54   Aureococcus: 49  
##  AGC_1008101 :   1   Aureococcus anophagefferens:  50   uc_Bacteria: 45  
##  AGC_10118919:   1   Euryarchaeota archaeon     :  49   Bathycoccus: 31  
##  AGC_1012388 :   1   (Other)                    :1539   (Other)    :952  
##  (Other)     :2142   NA's                       :  87   NA's       : 87  
##            Family_uniref            Order_uniref               Class_uniref
##  unknown          : 713   unknown         : 553   unknown            :626  
##  Phaeocystaceae   :  97   Flavobacteriales: 154   Alphaproteobacteria:273  
##  Flavobacteriaceae:  96   Rhodobacterales :  98   Gammaproteobacteria:185  
##  uc_Bacteria      :  48   Phaeocystales   :  97   Flavobacteriia     :152  
##  Alteromonadaceae :  42   Mamiellales     :  75   Mamiellophyceae    : 75  
##  (Other)          :1065   (Other)         :1084   (Other)            :750  
##  NA's             :  87   NA's            :  87   NA's               : 87  
##         Phylum_uniref       Domain_uniref     Domain_gtdb  
##  Proteobacteria:598   unknown      :1787    Archaea :  64  
##  unknown       :314   Viridiplantae:  93    Bacteria:1977  
##  Bacteroidetes :181   Metazoa      :  67   NA's     : 107  
##  Haptophyta    :181   uc_Bacteria  :  41                   
##  Chlorophyta   : 79   Fungi        :  28                   
##  (Other)       :708   (Other)      :  45                   
##  NA's          : 87   NA's         :  87                   
##              Phylum_gtdb                 Class_gtdb              Order_gtdb  
##   Pseudomonadota   :858    Gammaproteobacteria:434    Rhodobacterales : 159  
##   Bacteroidota     :225    Alphaproteobacteria:400    Flavobacteriales: 152  
##   Actinomycetota   :201    Bacteroidia        :224    Burkholderiales : 134  
##   Verrucomicrobiota: 88    Actinomycetes      :181    Pseudomonadales : 120  
##   Planctomycetota  : 81    Verrucomicrobiae   : 86    Enterobacterales: 114  
##  (Other)           :401   (Other)             :513   (Other)          :1094  
##  NA's              :294   NA's                :310   NA's             : 375  
##              Family_gtdb               Genus_gtdb  
##   Rhodobacteraceae : 162    Alteromonas     :  65  
##   Flavobacteriaceae: 108    Streptomyces    :  53  
##   Alteromonadaceae :  92    Planktomarina   :  37  
##   Burkholderiaceae :  63    GCA-2721545     :  24  
##   Streptomycetaceae:  53    Nitrosopelagicus:  24  
##  (Other)           :1280   (Other)          :1539  
##  NA's              : 390   NA's             : 406  
##                      Species_gtdb 
##   Alteromonas alba         :  47  
##   GCA-2721545 sp905612245  :  23  
##   Psychrobacter sp000511655:  20  
##   Planktomarina sp028822275:  16  
##   Sulfuritalea sp022843505 :  16  
##  (Other)                   :1558  
##  NA's                      : 468
# Add functional information, pre-defined as the mode of all annotations within the AGC (not taking only into account nickel-related genes/contigs)
AGC_function <- fread("/Users/emifaure/Documents/ACE/NolwennCollab/Post_Review/Annotations_AGC_Metallo_AGCLevel.tsv", data.table=F)
names(AGC_function)=c("AGC_ID","AGC_Rep","AGC_Size","AGC_Cat","Singl_Cat","KEGG","seed_ortholog","eggNOG_OGs","narr_OG_name","narr_OG_cat","narr_OG_desc","best_OG_name","best_OG_cat","best_OG_desc","Preferred_name","CAZy","BiGG_Reaction","PFAMs")
AGC_Taxo_Annot <- merge(AGC_function, AGC_Taxo_Annot, by="AGC_ID")

AGC_Taxo_Annot<-as.data.frame(AGC_Taxo_Annot)

7.1 RDA small fraction

# Multivariate analysis
#. I- FL

# We'll try to compute an RDA to determine how much of cluster abundance is driven by Nickel
meta_nickel_FL_rda = select(meta_nickel_FL,c("ACE_seq_name","Ni_60_58_D_DELTA_BOTTLE","Ni_D_CONC_BOTTLE"))
row.names(meta_nickel_FL_rda)=meta_nickel_FL_rda$ACE_seq_name
meta_nickel_FL_rda=select(meta_nickel_FL_rda,-c("ACE_seq_name"))
meta_nickel_FL_rda=data.frame(scale(meta_nickel_FL_rda))

# Get rid of the AGC present in only 1 sample
GeneMat_Nickel_FL <- GeneMat_Nickel_FL[,-which(colSums(GeneMat_Nickel_FL>0)<2)]

# IF USING non transformed data, need to transform the abundances to deal with compositionality
#GeneMat_Nickel_FL <- decostand(GeneMat_Nickel_FL, method="rclr")

# Match the row order
GeneMat_Nickel_FL_rda=GeneMat_Nickel_FL[match(row.names(meta_nickel_FL_rda),row.names(GeneMat_Nickel_FL)),]

RDA=rda(GeneMat_Nickel_FL_rda ~ ., data = meta_nickel_FL_rda)
#summary(RDA)
RsquareAdj(RDA) #16.9%
## $r.squared
## [1] 0.2261923
## 
## $adj.r.squared
## [1] 0.1688732
anova.cca(RDA) #Significant
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = GeneMat_Nickel_FL_rda ~ Ni_60_58_D_DELTA_BOTTLE + Ni_D_CONC_BOTTLE, data = meta_nickel_FL_rda)
##          Df Variance      F Pr(>F)    
## Model     2   85.907 3.9462  0.001 ***
## Residual 27  293.889                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova.cca(RDA, by="axis") # Both significant
## Permutation test for rda under reduced model
## Forward tests for axes
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = GeneMat_Nickel_FL_rda ~ Ni_60_58_D_DELTA_BOTTLE + Ni_D_CONC_BOTTLE, data = meta_nickel_FL_rda)
##          Df Variance      F Pr(>F)   
## RDA1      1   51.492 4.7307  0.002 **
## RDA2      1   34.415 3.1617  0.005 **
## Residual 27  293.889                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
scores=scoresRDA(RDA)
## 
## -----------------------------------------------------------------------
## Site constraints (lc) selected. To obtain site scores that are weighted
## sums of species scores (default in vegan), argument site.sc must be set
## to wa.
## -----------------------------------------------------------------------
samples = scores$Sites
samples = as.data.frame(samples)
samples = merge(samples,meta_nickel_FL, by.x="row.names", by.y="ACE_seq_name")
row.names(samples)=samples[,1]
samples=samples[,-1]

# Retrieve AGC scores
AGCScore <- scores$Species
AGCScore = as.data.frame(AGCScore)
# Keep only scores away from center
AGCScore_select = AGCScore[which(AGCScore$RDA1>0.75 | AGCScore$RDA1<(-0.75) | AGCScore$RDA2>0.75 | AGCScore$RDA2<(-0.75)),]

enviscore=scores$Biplot

#Plot the triplot
ggplot() +   geom_hline(yintercept = 0, linetype='dotted') +
  geom_vline(xintercept = 0, linetype='dotted') +
  labs(x = paste0("RDA1 (",
                  round(100 * scores$Eigenval.rel[1], 1), "%)"),
       y = paste0("RDA2 (",
                  round(100 * scores$Eigenval.rel[2], 1), "%)")) +
  theme(plot.title=element_text(hjust=0.5)) +
  geom_segment(data=as.data.frame(AGCScore),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'indianred2',arrow = arrow(length = unit(0.2,"cm")), alpha=0.25) +
  geom_point(aes(samples[,1], samples[,2], col=samples$Ni_60_58_D_DELTA_BOTTLE), size = 5, alpha = 0.9) +
  geom_text(aes(samples[,1], samples[,2], label=samples$TM_station_number), col="white", size=3) +
  geom_segment(data=as.data.frame(enviscore),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5, linetype="F1",color = 'orange',arrow = arrow(length = unit(0.2,"cm"))) +
  geom_text(aes(enviscore[,1]*1.05, enviscore[,2]*1.05, label=rownames(enviscore)), color="orange") +
  geom_text(aes(AGCScore_select[,1]*1.05, AGCScore_select[,2]*1.05, label=rownames(AGCScore_select)), color="indianred2") +
  labs(col = expression(delta^60 * Ni ~ "(‰)")) +
  geom_segment(data=as.data.frame(AGCScore_select),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'indianred2',arrow = arrow(length = unit(0.2,"cm"))) +
  theme_bw() 

# A pack of AGC is associated to station 11
MertzAGC <- AGCScore[which(AGCScore$RDA1>0.5 & AGCScore$RDA2>(-0.5) & AGCScore$RDA2<(0)),]
MertzAGC <- MertzAGC[order(MertzAGC$RDA2, decreasing = F),]
MertzAGCannot <- AGC_Taxo_Annot[which(AGC_Taxo_Annot$AGC_ID %in% row.names(MertzAGC)),]
MertzAGCannot <- MertzAGCannot[match(row.names(MertzAGC), MertzAGCannot$AGC_ID),]
MertzAGCannot
##            AGC_ID                         AGC_Rep AGC_Size AGC_Cat Singl_Cat
## 517  AGC_17036882  ACE_G151_000000191440_20051104      149       K          
## 327  AGC_14355128 ACE_G170_000000261377_170136783      444       K          
## 781  AGC_20916161  ACE_G22_000000108730_163065181      279       K          
## 593   AGC_1819471  ACE_G23_000000163918_128019073      120     KWP          
## 925  AGC_22886549  ACE_G105_000000212698_61593594      101       K          
## 1469  AGC_3062244 ACE_G110_000000183616_158277234       12      GU          
## 1680  AGC_5999524  ACE_G151_000000097443_46338114       32       K          
## 25   AGC_10331396  ACE_G103_000000165822_96587828       22     KWP          
## 679  AGC_19486454  ACE_G119_000000026685_44592785       70       K          
## 1352 AGC_28603842 ACE_G196_000000063741_153664976       14       K          
## 610  AGC_18521265  ACE_G158_000000041593_81730025       40       K          
## 56    AGC_1069464  ACE_G109_000000000736_79295090       15       K          
## 371  AGC_14931481  ACE_G27_000000050805_128184360       42       K          
##        KEGG                 seed_ortholog
## 517         1443665.JACA01000026_gene3529
## 327                    1137799.GZ78_13105
## 781                    491952.Mar181_2423
## 593                                      
## 925  K04564              472759.Nhal_2272
## 1469                313595.P700755_001745
## 1680 K04564             867900.Celly_1246
## 25                  313595.P700755_001745
## 679                                      
## 1352 K04564            313594.PI23P_02707
## 610                    313594.PI23P_07510
## 56   K03187          391626.OAN307_c42250
## 371  K04564  686578.AFFX01000004_gene3555
##                                                                                                                 eggNOG_OGs
## 517          COG1573@1|root,COG1573@2|Bacteria,4NG84@976|Bacteroidetes,1HXRH@117743|Flavobacteriia,2YHMM@290174|Aquimarina
## 327  293MU@1|root,2ZR3M@2|Bacteria,1REEY@1224|Proteobacteria,1S3UE@1236|Gammaproteobacteria,1XJID@135619|Oceanospirillales
## 781  293MU@1|root,2ZR3M@2|Bacteria,1REEY@1224|Proteobacteria,1S3UE@1236|Gammaproteobacteria,1XJID@135619|Oceanospirillales
## 593                                                                                                                       
## 925   COG0605@1|root,COG0605@2|Bacteria,1MVW2@1224|Proteobacteria,1RP7X@1236|Gammaproteobacteria,1WWRN@135613|Chromatiales
## 1469       COG2032@1|root,COG2032@2|Bacteria,4NGGW@976|Bacteroidetes,1HY8P@117743|Flavobacteriia,4C3V1@83612|Psychroflexus
## 1680                                 COG0605@1|root,COG0605@2|Bacteria,4NDZ4@976|Bacteroidetes,1HX6F@117743|Flavobacteriia
## 25         COG2032@1|root,COG2032@2|Bacteria,4NGGW@976|Bacteroidetes,1HY8P@117743|Flavobacteriia,4C3V1@83612|Psychroflexus
## 679                                  COG2032@1|root,COG2032@2|Bacteria,4NM88@976|Bacteroidetes,1I17T@117743|Flavobacteriia
## 1352        COG0605@1|root,COG0605@2|Bacteria,4NDZ4@976|Bacteroidetes,1HX6F@117743|Flavobacteriia,3VVG2@52959|Polaribacter
## 610                                  COG2032@1|root,COG2032@2|Bacteria,4NM88@976|Bacteroidetes,1I17T@117743|Flavobacteriia
## 56                             COG2371@1|root,COG2371@2|Bacteria,1MZQZ@1224|Proteobacteria,2TVBW@28211|Alphaproteobacteria
## 371                             COG0605@1|root,COG0605@2|Bacteria,1MVW2@1224|Proteobacteria,1RP7X@1236|Gammaproteobacteria
##                         narr_OG_name narr_OG_cat
## 517          2YHMM@290174|Aquimarina           L
## 327   1XJID@135619|Oceanospirillales           S
## 781   1XJID@135619|Oceanospirillales           S
## 593                                            L
## 925        1WWRN@135613|Chromatiales           P
## 1469       4C3V1@83612|Psychroflexus           P
## 1680     1HX6F@117743|Flavobacteriia           P
## 25         4C3V1@83612|Psychroflexus           P
## 679      1I17T@117743|Flavobacteriia           P
## 1352        3VVG2@52959|Polaribacter           C
## 610      1I17T@117743|Flavobacteriia           P
## 56   2TVBW@28211|Alphaproteobacteria           O
## 371   1RP7X@1236|Gammaproteobacteria           C
##                                                                                                                                                   narr_OG_desc
## 517                                                                                                                                     Uracil-DNA glycosylase
## 327                                                                                                                                       Superoxide dismutase
## 781                                                                                                                                       Superoxide dismutase
## 593  Excises uracil residues from the DNA which can arise as a result of misincorporation of dUMP residues by DNA polymerase or due to deamination of cytosine
## 925                                                   Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 1469                                                                                                                             superoxide dismutase activity
## 1680                                                  Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 25                                                                                                                               superoxide dismutase activity
## 679                                                   Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 1352                                                  Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 610                                                   Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 56                                 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 371                                                   Destroys radicals which are normally produced within the cells and which are toxic to biological systems
##                         best_OG_name best_OG_cat
## 517  2TSWU@28211|Alphaproteobacteria           L
## 327   1XJID@135619|Oceanospirillales           S
## 781   1XJID@135619|Oceanospirillales           S
## 593          4NE2B@976|Bacteroidetes           L
## 925        1WWRN@135613|Chromatiales           C
## 1469         4NGGW@976|Bacteroidetes           P
## 1680         4NDZ4@976|Bacteroidetes           P
## 25           4NGGW@976|Bacteroidetes           P
## 679          4NM88@976|Bacteroidetes           P
## 1352         4NDZ4@976|Bacteroidetes           P
## 610          4NM88@976|Bacteroidetes           P
## 56   2TVBW@28211|Alphaproteobacteria           O
## 371   1RP7X@1236|Gammaproteobacteria           C
##                                                                                                                                                   best_OG_desc
## 517                                                                                                                                     Uracil-DNA glycosylase
## 327                                                                                                                                       Superoxide dismutase
## 781                                                                                                                                       Superoxide dismutase
## 593  Excises uracil residues from the DNA which can arise as a result of misincorporation of dUMP residues by DNA polymerase or due to deamination of cytosine
## 925                                                   Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 1469                                                                                                                                   Pfam Secreted repeat of
## 1680                                                  Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 25                                                                                                                                     Pfam Secreted repeat of
## 679                                                   Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 1352                                                  Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 610                                                   Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 56                                 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 371                                                   Destroys radicals which are normally produced within the cells and which are toxic to biological systems
##      Preferred_name CAZy
## 517               -    -
## 327               -    -
## 781               -    -
## 593             ung    -
## 925               -    -
## 1469              -    -
## 1680           sodA    -
## 25                -    -
## 679            sodC    -
## 1352           sodA    -
## 610            sodC    -
## 56             ureE    -
## 371            sodB    -
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          BiGG_Reaction
## 517                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  -
## 327                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  -
## 781                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  -
## 593                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  -
## 925                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  -
## 1469                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 -
## 1680                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 -
## 25                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   -
## 679                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  -
## 1352                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 -
## 610                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  -
## 56                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   -
## 371  iAF1260.b1656,iAPECO1_1312.APECO1_736,iB21_1397.B21_01616,iBWG_1329.BWG_1471,iE2348C_1286.E2348C_1742,iEC042_1314.EC042_1825,iEC55989_1330.EC55989_1824,iECABU_c1320.ECABU_c19090,iECBD_1354.ECBD_1987,iECB_1328.ECB_01626,iECDH10B_1368.ECDH10B_1790,iECDH1ME8569_1439.ECDH1ME8569_1601,iECD_1391.ECD_01626,iECED1_1282.ECED1_1855,iECH74115_1262.ECH74115_2368,iECIAI1_1343.ECIAI1_1708,iECO103_1326.ECO103_1797,iECO111_1330.ECO111_2126,iECO26_1355.ECO26_2385,iECOK1_1307.ECOK1_1775,iECP_1309.ECP_1601,iECS88_1305.ECS88_1705,iECSE_1348.ECSE_1780,iECSP_1301.ECSP_2222,iECUMN_1333.ECUMN_1946,iECW_1372.ECW_m1823,iECs_1301.ECs2365,iEKO11_1354.EKO11_2118,iETEC_1333.ETEC_1691,iEcDH1_1363.EcDH1_1984,iEcE24377_1341.EcE24377A_1869,iEcHS_1320.EcHS_A1735,iEcSMS35_1347.EcSMS35_1542,iEcolC_1368.EcolC_1973,iG2583_1286.G2583_2051,iJO1366.b1656,iJR904.b1656,iLF82_1304.LF82_2148,iNRG857_1313.NRG857_08300,iSBO_1134.SBO_1475,iSDY_1059.SDY_1882,iSFV_1184.SFV_1678,iSF_1195.SF1684,iSFxv_1172.SFxv_1892,iSSON_1240.SSON_1500,iS_1188.S1816,iSbBS512_1146.SbBS512_E1853,iUMN146_1321.UM146_08870,iUMNK88_1353.UMNK88_2117,iUTI89_1310.UTI89_C1847,iWFL_1372.ECW_m1823,iY75_1357.Y75_RS08680,iZ_1308.Z2678,ic_1306.c2050
##                  PFAMs                 Species_uniref   Genus_uniref
## 517                UDG                 uc_Nitrospinae uc_Nitrospinae
## 327             Sod_Ni           uc_Flavobacteriaceae   Polaribacter
## 781             Sod_Ni                        unknown        unknown
## 593                UDG    Flavobacteriaceae bacterium        unknown
## 925  Sod_Fe_C,Sod_Fe_N     Glaciecola nitratireducens        unknown
## 1469              CHRD          Polaribacter irgensii  Psychroflexus
## 1680 Sod_Fe_C,Sod_Fe_N     Flavobacteriales bacterium        unknown
## 25                CHRD     Psychroflexus gondwanensis  Psychroflexus
## 679             Sod_Cu unidentified eubacterium SCB49        unknown
## 1352 Sod_Fe_C,Sod_Fe_N                uc_Polaribacter   Polaribacter
## 610             Sod_Cu               Polaribacter sp.   Polaribacter
## 56       UreE_C,UreE_N                        unknown        unknown
## 371  Sod_Fe_C,Sod_Fe_N                        unknown        unknown
##          Family_uniref     Order_uniref        Class_uniref  Phylum_uniref
## 517     uc_Nitrospinae  Rhodobacterales Alphaproteobacteria    Nitrospinae
## 327  Flavobacteriaceae Flavobacteriales      Flavobacteriia  Bacteroidetes
## 781            unknown          unknown             unknown        unknown
## 593  Flavobacteriaceae Flavobacteriales      Flavobacteriia  Bacteroidetes
## 925            unknown  Alteromonadales Gammaproteobacteria Proteobacteria
## 1469 Flavobacteriaceae Flavobacteriales      Flavobacteriia  Bacteroidetes
## 1680           unknown Flavobacteriales      Flavobacteriia  Bacteroidetes
## 25   Flavobacteriaceae Flavobacteriales      Flavobacteriia  Bacteroidetes
## 679  Flavobacteriaceae Flavobacteriales      Flavobacteriia  Bacteroidetes
## 1352 Flavobacteriaceae Flavobacteriales      Flavobacteriia  Bacteroidetes
## 610  Flavobacteriaceae Flavobacteriales      Flavobacteriia  Bacteroidetes
## 56             unknown          unknown             unknown        unknown
## 371            unknown          unknown             unknown        unknown
##      Domain_uniref Domain_gtdb     Phylum_gtdb           Class_gtdb
## 517        unknown    Bacteria    Nitrospinota          Nitrospinia
## 327        unknown    Bacteria    Bacteroidota          Bacteroidia
## 781        unknown    Bacteria  Pseudomonadota  Gammaproteobacteria
## 593        unknown    Bacteria    Bacteroidota          Bacteroidia
## 925        unknown    Bacteria  Pseudomonadota  Gammaproteobacteria
## 1469       unknown    Bacteria    Bacteroidota          Bacteroidia
## 1680       unknown    Bacteria    Bacteroidota          Bacteroidia
## 25         unknown    Bacteria    Bacteroidota          Bacteroidia
## 679        unknown    Bacteria    Bacteroidota          Bacteroidia
## 1352       unknown    Bacteria    Bacteroidota          Bacteroidia
## 610        unknown    Bacteria    Bacteroidota          Bacteroidia
## 56         unknown    Bacteria  Pseudomonadota  Alphaproteobacteria
## 371        unknown    Bacteria  Pseudomonadota  Gammaproteobacteria
##             Order_gtdb        Family_gtdb       Genus_gtdb
## 517      Nitrospinales               VA-1          UBA8687
## 327   Flavobacteriales  Flavobacteriaceae  Winogradskyella
## 781    Pseudomonadales     Porticoccaceae         HTCC2207
## 593   Flavobacteriales  Flavobacteriaceae           SGZJ01
## 925      GCA-001735895      GCA-001735895    GCA-001735895
## 1469  Flavobacteriales  Flavobacteriaceae     Polaribacter
## 1680  Flavobacteriales  Flavobacteriaceae        UWMA-0277
## 25    Flavobacteriales  Flavobacteriaceae     Polaribacter
## 679   Flavobacteriales  Flavobacteriaceae    GCA-002733185
## 1352  Flavobacteriales  Flavobacteriaceae     Polaribacter
## 610   Flavobacteriales  Flavobacteriaceae     Polaribacter
## 56     Rhodobacterales   Rhodobacteraceae   Octadecabacter
## 371    Pseudomonadales   Pseudomonadaceae  Halopseudomonas
##                      Species_gtdb
## 517           UBA8687 sp013349585
## 327   Winogradskyella sp002364335
## 781          HTCC2207 sp913031645
## 593            SGZJ01 sp028825365
## 925     GCA-001735895 sp913061315
## 1469        Polaribacter irgensii
## 1680        UWMA-0277 sp905612305
## 25          Nonlabens sp012974865
## 679     GCA-002733185 sp030591585
## 1352     Polaribacter sp913050305
## 610      Polaribacter sp003537695
## 56     Octadecabacter sp028825445
## 371    Halopseudomonas neustonica
MertzGenes <- Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID %in% row.names(MertzAGC)),]
#MertzGenes

# Check the taxo within those that are dominated by unknown
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_20916161"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_1069464"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_14931481"),]

# 3 others are correlated to dNi yet in between station 11 and 8
DNiAGC <- AGCScore[which(AGCScore$RDA1>0.5 & AGCScore$RDA2<(-0.5)),]
DNiAGC <- DNiAGC[order(DNiAGC$RDA1, decreasing = T),]
DNiAGCannot <- AGC_Taxo_Annot[which(AGC_Taxo_Annot$AGC_ID %in% row.names(DNiAGC)),]
DNiAGCannot[match(row.names(DNiAGC), DNiAGCannot$AGC_ID),]
##           AGC_ID                         AGC_Rep AGC_Size AGC_Cat Singl_Cat
## 354 AGC_14687886 ACE_G104_000000029829_114121823      154       K          
## 342 AGC_14548656  ACE_G143_000000003166_98493499       94       K          
## 939 AGC_23057241  ACE_G125_000000206004_18635940       72       K          
##       KEGG       seed_ortholog
## 354 K03187   1342299.Z947_2243
## 342 K03188 391616.OA238_c03380
## 939 K03190 666509.RCA23_c08820
##                                                                                                                eggNOG_OGs
## 354 COG2371@1|root,COG2371@2|Bacteria,1MZQZ@1224|Proteobacteria,2TVBW@28211|Alphaproteobacteria,3ZXMP@60136|Sulfitobacter
## 342                           COG0830@1|root,COG0830@2|Bacteria,1MW8Q@1224|Proteobacteria,2TRVW@28211|Alphaproteobacteria
## 939                           COG0829@1|root,COG0829@2|Bacteria,1RABD@1224|Proteobacteria,2U57R@28211|Alphaproteobacteria
##                        narr_OG_name narr_OG_cat
## 354       3ZXMP@60136|Sulfitobacter           O
## 342 2TRVW@28211|Alphaproteobacteria           O
## 939 2U57R@28211|Alphaproteobacteria           O
##                                                                                                                    narr_OG_desc
## 354 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 342                       Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
## 939                       Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
##                        best_OG_name best_OG_cat
## 354 2TVBW@28211|Alphaproteobacteria           O
## 342 2TRVW@28211|Alphaproteobacteria           O
## 939 2U57R@28211|Alphaproteobacteria           O
##                                                                                                                    best_OG_desc
## 354 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 342                       Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
## 939                       Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
##     Preferred_name CAZy BiGG_Reaction         PFAMs             Species_uniref
## 354           ureE    -             - UreE_C,UreE_N                    unknown
## 342           ureF    -             -          UreF                    unknown
## 939           ureD    -             -          UreD Rhodobacteraceae bacterium
##     Genus_uniref    Family_uniref    Order_uniref        Class_uniref
## 354      unknown          unknown         unknown             unknown
## 342      unknown          unknown         unknown             unknown
## 939      unknown Rhodobacteraceae Rhodobacterales Alphaproteobacteria
##      Phylum_uniref Domain_uniref Domain_gtdb     Phylum_gtdb
## 354        unknown       unknown    Bacteria  Pseudomonadota
## 342 Proteobacteria       unknown    Bacteria  Pseudomonadota
## 939 Proteobacteria       unknown    Bacteria  Pseudomonadota
##               Class_gtdb       Order_gtdb       Family_gtdb          Genus_gtdb
## 354  Alphaproteobacteria  Rhodobacterales  Rhodobacteraceae              Yoonia
## 342  Alphaproteobacteria  Rhodobacterales  Rhodobacteraceae      Octadecabacter
## 939  Alphaproteobacteria  Rhodobacterales  Rhodobacteraceae  Ascidiaceihabitans
##                        Species_gtdb
## 354              Yoonia sp023955335
## 342      Octadecabacter sp028825445
## 939  Ascidiaceihabitans sp905182355
DNiAGCannot
##           AGC_ID                         AGC_Rep AGC_Size AGC_Cat Singl_Cat
## 342 AGC_14548656  ACE_G143_000000003166_98493499       94       K          
## 354 AGC_14687886 ACE_G104_000000029829_114121823      154       K          
## 939 AGC_23057241  ACE_G125_000000206004_18635940       72       K          
##       KEGG       seed_ortholog
## 342 K03188 391616.OA238_c03380
## 354 K03187   1342299.Z947_2243
## 939 K03190 666509.RCA23_c08820
##                                                                                                                eggNOG_OGs
## 342                           COG0830@1|root,COG0830@2|Bacteria,1MW8Q@1224|Proteobacteria,2TRVW@28211|Alphaproteobacteria
## 354 COG2371@1|root,COG2371@2|Bacteria,1MZQZ@1224|Proteobacteria,2TVBW@28211|Alphaproteobacteria,3ZXMP@60136|Sulfitobacter
## 939                           COG0829@1|root,COG0829@2|Bacteria,1RABD@1224|Proteobacteria,2U57R@28211|Alphaproteobacteria
##                        narr_OG_name narr_OG_cat
## 342 2TRVW@28211|Alphaproteobacteria           O
## 354       3ZXMP@60136|Sulfitobacter           O
## 939 2U57R@28211|Alphaproteobacteria           O
##                                                                                                                    narr_OG_desc
## 342                       Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
## 354 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 939                       Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
##                        best_OG_name best_OG_cat
## 342 2TRVW@28211|Alphaproteobacteria           O
## 354 2TVBW@28211|Alphaproteobacteria           O
## 939 2U57R@28211|Alphaproteobacteria           O
##                                                                                                                    best_OG_desc
## 342                       Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
## 354 Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 939                       Required for maturation of urease via the functional incorporation of the urease nickel metallocenter
##     Preferred_name CAZy BiGG_Reaction         PFAMs             Species_uniref
## 342           ureF    -             -          UreF                    unknown
## 354           ureE    -             - UreE_C,UreE_N                    unknown
## 939           ureD    -             -          UreD Rhodobacteraceae bacterium
##     Genus_uniref    Family_uniref    Order_uniref        Class_uniref
## 342      unknown          unknown         unknown             unknown
## 354      unknown          unknown         unknown             unknown
## 939      unknown Rhodobacteraceae Rhodobacterales Alphaproteobacteria
##      Phylum_uniref Domain_uniref Domain_gtdb     Phylum_gtdb
## 342 Proteobacteria       unknown    Bacteria  Pseudomonadota
## 354        unknown       unknown    Bacteria  Pseudomonadota
## 939 Proteobacteria       unknown    Bacteria  Pseudomonadota
##               Class_gtdb       Order_gtdb       Family_gtdb          Genus_gtdb
## 342  Alphaproteobacteria  Rhodobacterales  Rhodobacteraceae      Octadecabacter
## 354  Alphaproteobacteria  Rhodobacterales  Rhodobacteraceae              Yoonia
## 939  Alphaproteobacteria  Rhodobacterales  Rhodobacteraceae  Ascidiaceihabitans
##                        Species_gtdb
## 342      Octadecabacter sp028825445
## 354              Yoonia sp023955335
## 939  Ascidiaceihabitans sp905182355
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_14687886"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_14548656"),]

# 24 are associated to station 8
ST8AGC <- AGCScore[which((AGCScore$RDA1<(0.5) & AGCScore$RDA1>(0) & AGCScore$RDA2<(-0.5)) | (AGCScore$RDA1<(0) & AGCScore$RDA1>(-0.5) & AGCScore$RDA2<(-0.75))),]
ST8AGC <- ST8AGC[order(ST8AGC$RDA1, decreasing = T),]
ST8AGCannot <- AGC_Taxo_Annot[which(AGC_Taxo_Annot$AGC_ID %in% row.names(ST8AGC)),]
ST8AGCannot <- ST8AGCannot[match(row.names(ST8AGC), ST8AGCannot$AGC_ID),]
ST8AGCannot
##            AGC_ID                         AGC_Rep AGC_Size AGC_Cat Singl_Cat
## 964  AGC_23334844   ACE_G66_000000022940_50631213       54       K          
## 1804  AGC_7792447   ACE_G36_000000069010_49679286     2463       K          
## 362  AGC_14840044  ACE_G92_000000082546_139792161      852       K          
## 298  AGC_13989203  ACE_G123_000000257491_88665121      275       K          
## 88   AGC_11093182  ACE_G80_000000112913_174111159       20       K          
## 836   AGC_2164110 ACE_G157_000000108833_151819534      169       K          
## 544  AGC_17472745  ACE_G15_000000053663_108120463      165       K          
## 618  AGC_18601609 ACE_G173_000000057257_152747621       31       K          
## 1081 AGC_24807388 ACE_G188_000000079517_144668771      114       K          
## 588  AGC_18108257 ACE_G159_000000214533_108087016       29       K          
## 1837  AGC_8142656  ACE_G174_000000515234_74005539       22       K          
## 1169 AGC_25996182  ACE_G194_000000189051_74736089       57       K          
## 1183 AGC_26196954    ACE_G48_000000219824_6215485       31       K          
## 1419 AGC_29397748  ACE_G172_000000569973_65083237      240       K          
## 1542  AGC_4137832  ACE_G173_000000327145_47585756       45      GU          
## 1375 AGC_28890756  ACE_G93_000000207684_131098583       25      GU          
## 1504  AGC_3526244   ACE_G86_000000312768_16686896       21      GU          
## 730  AGC_20290509  ACE_G86_000000265023_148188009       18      GU          
## 1290 AGC_27773751  ACE_G89_000000251289_113307982       20      GU          
## 1633  AGC_5500562 ACE_G159_000000163877_143139768       40      GU          
## 650  AGC_19008763   ACE_G175_000000374355_3929490       57     KWP          
## 822  AGC_21493453  ACE_G28_000000006429_101950529       71     KWP          
## 100  AGC_11214517  ACE_G92_000000395364_122293427       38      GU          
## 250  AGC_13479537  ACE_G88_000000238137_165809251       67       K          
##        KEGG                 seed_ortholog
## 964         1342301.JASD01000008_gene2370
## 1804 K04564           666509.RCA23_c18480
## 362                  44056.XP_009034551.1
## 298  K04564           666509.RCA23_c18480
## 88   K04565          41875.XP_007508895.1
## 836                1298865.H978DRAFT_2911
## 544                   313596.RB2501_04370
## 618  K04564         296587.XP_002507909.1
## 1081                 44056.XP_009039471.1
## 588                  41875.XP_007509011.1
## 1837 K03187            313625.BL107_06974
## 1169 K03190          41875.XP_007515132.1
## 1183                        2903.EOD39616
## 1419                       243090.RB12634
## 1542                   313594.PI23P_11007
## 1375                 344747.PM8797T_02424
## 1504                   313594.PI23P_11007
## 730                  344747.PM8797T_02424
## 1290                313595.P700755_001745
## 1633                    886377.Murru_2609
## 650                     439493.PB7211_332
## 822         1265756.AWZW01000008_gene1247
## 100                    313594.PI23P_11007
## 250          1131266.ARWQ01000023_gene385
##                                                                                                                                                                                                       eggNOG_OGs
## 964                                                                                        COG0378@1|root,COG0378@2|Bacteria,1MVBD@1224|Proteobacteria,2TQMU@28211|Alphaproteobacteria,3ZWJ9@60136|Sulfitobacter
## 1804                                                                                                                 COG0605@1|root,COG0605@2|Bacteria,1MVW2@1224|Proteobacteria,2TU3T@28211|Alphaproteobacteria
## 362                                                                                                                                                                        COG5272@1|root,KOG0001@2759|Eukaryota
## 298                                                                                                                  COG0605@1|root,COG0605@2|Bacteria,1MVW2@1224|Proteobacteria,2TU3T@28211|Alphaproteobacteria
## 88                                                                                                                                               COG2032@1|root,KOG0441@2759|Eukaryota,37Q1U@33090|Viridiplantae
## 836                                                                                          293MU@1|root,2ZR3M@2|Bacteria,1REEY@1224|Proteobacteria,1S3UE@1236|Gammaproteobacteria,46B1B@72275|Alteromonadaceae
## 544                                                                                                                        COG0374@1|root,COG0374@2|Bacteria,4NJIP@976|Bacteroidetes,1I3BR@117743|Flavobacteriia
## 618                                                                                                                       COG0605@1|root,KOG0876@2759|Eukaryota,37MWT@33090|Viridiplantae,34HM7@3041|Chlorophyta
## 1081                                                                                                                                                                         COG0804@1|root,2QPKB@2759|Eukaryota
## 588                                                                                                                                              COG2608@1|root,KOG4656@2759|Eukaryota,37QGH@33090|Viridiplantae
## 1837                                                                                                                         COG2371@1|root,COG2371@2|Bacteria,1G6TF@1117|Cyanobacteria,1H0TA@1129|Synechococcus
## 1169                                                                                                                                               COG0138@1|root,2QSQN@2759|Eukaryota,37QE5@33090|Viridiplantae
## 1183                                                                                                                                                                           2A42S@1|root,2S3XN@2759|Eukaryota
## 1419                                                                                                                                                                               2A42S@1|root,30SMP@2|Bacteria
## 1542                                                                                              COG2032@1|root,COG2032@2|Bacteria,4NGGW@976|Bacteroidetes,1HY8P@117743|Flavobacteriia,3VWSB@52959|Polaribacter
## 1375 COG1404@1|root,COG2931@1|root,COG3209@1|root,COG3210@1|root,COG4932@1|root,COG5276@1|root,COG1404@2|Bacteria,COG2931@2|Bacteria,COG3209@2|Bacteria,COG3210@2|Bacteria,COG4932@2|Bacteria,COG5276@2|Bacteria
## 1504                                                                                              COG2032@1|root,COG2032@2|Bacteria,4NGGW@976|Bacteroidetes,1HY8P@117743|Flavobacteriia,3VWSB@52959|Polaribacter
## 730  COG1404@1|root,COG2931@1|root,COG3209@1|root,COG3210@1|root,COG4932@1|root,COG5276@1|root,COG1404@2|Bacteria,COG2931@2|Bacteria,COG3209@2|Bacteria,COG3210@2|Bacteria,COG4932@2|Bacteria,COG5276@2|Bacteria
## 1290                                                                                             COG2032@1|root,COG2032@2|Bacteria,4NGGW@976|Bacteroidetes,1HY8P@117743|Flavobacteriia,4C3V1@83612|Psychroflexus
## 1633                                                                                                                       COG4315@1|root,COG4315@2|Bacteria,4NI3N@976|Bacteroidetes,1HYWK@117743|Flavobacteriia
## 650                                                                     COG1573@1|root,COG1573@2|Bacteria,1MW8T@1224|Proteobacteria,2TSAR@28211|Alphaproteobacteria,4BT8H@82117|unclassified Alphaproteobacteria
## 822                                                                     COG1573@1|root,COG1573@2|Bacteria,1MW91@1224|Proteobacteria,2TSUP@28211|Alphaproteobacteria,4BQ5P@82117|unclassified Alphaproteobacteria
## 100                                                                                               COG2032@1|root,COG2032@2|Bacteria,4NGGW@976|Bacteroidetes,1HY8P@117743|Flavobacteriia,3VWSB@52959|Polaribacter
## 250                                                                                                                                           COG0831@1|root,arCOG04528@2157|Archaea,41SPI@651137|Thaumarchaeota
##                                                                                                                     narr_OG_name
## 964                                                                                                    3ZWJ9@60136|Sulfitobacter
## 1804                                                                                             2TU3T@28211|Alphaproteobacteria
## 362                                                                                                       KOG0001@2759|Eukaryota
## 298                                                                                              2TU3T@28211|Alphaproteobacteria
## 88                                                                                                     37Q1U@33090|Viridiplantae
## 836                                                                                                 46B1B@72275|Alteromonadaceae
## 544                                                                                                  1I3BR@117743|Flavobacteriia
## 618                                                                                                       34HM7@3041|Chlorophyta
## 1081                                                                                                        2QPKB@2759|Eukaryota
## 588                                                                                                    37QGH@33090|Viridiplantae
## 1837                                                                                                    1H0TA@1129|Synechococcus
## 1169                                                                                                   37QE5@33090|Viridiplantae
## 1183                                                                                                        2S3XN@2759|Eukaryota
## 1419                                                                                                            30SMP@2|Bacteria
## 1542                                                                                                    3VWSB@52959|Polaribacter
## 1375 COG1404@2|Bacteria / COG2931@2|Bacteria / COG3209@2|Bacteria / COG3210@2|Bacteria / COG4932@2|Bacteria / COG5276@2|Bacteria
## 1504                                                                                                    3VWSB@52959|Polaribacter
## 730  COG1404@2|Bacteria / COG2931@2|Bacteria / COG3209@2|Bacteria / COG3210@2|Bacteria / COG4932@2|Bacteria / COG5276@2|Bacteria
## 1290                                                                                                   4C3V1@83612|Psychroflexus
## 1633                                                                                                 1HYWK@117743|Flavobacteriia
## 650                                                                                 4BT8H@82117|unclassified Alphaproteobacteria
## 822                                                                                 4BQ5P@82117|unclassified Alphaproteobacteria
## 100                                                                                                     3VWSB@52959|Polaribacter
## 250                                                                                                  41SPI@651137|Thaumarchaeota
##                narr_OG_cat
## 964                     KO
## 1804                     P
## 362                      O
## 298                      C
## 88                       P
## 836                      S
## 544                      C
## 618                      P
## 1081                     E
## 588                      P
## 1837                     O
## 1169                     F
## 1183                     S
## 1419                     S
## 1542                     P
## 1375 O / Q / M / U / M / -
## 1504                     P
## 730  O / Q / M / U / M / -
## 1290                     P
## 1633                     S
## 650                      L
## 822                      L
## 100                      P
## 250                      E
##                                                                                                                                                      narr_OG_desc
## 964               Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG
## 1804                                                     Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 362                                                                                                                      cellular macromolecule catabolic process
## 298                                                      Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 88                                                       Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 836                                                                                                                        Nickel-containing superoxide dismutase
## 544                                                                                                                                     hydrogenase large subunit
## 618                                                      Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 1081                                                                                                                                              urease activity
## 588                                                                                                                     Copper chaperone for superoxide dismutase
## 1837                                  Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 1169                                                                                                                                UreD urease accessory protein
## 1183                                                                                                                       Nickel-containing superoxide dismutase
## 1419                                                                                                                       Nickel-containing superoxide dismutase
## 1542                                                                                                                                superoxide dismutase activity
## 1375 Belongs to the peptidase S8 family / calcium- and calmodulin-responsive adenylate cyclase activity / self proteolysis / domain, Protein / domain protein / -
## 1504                                                                                                                                superoxide dismutase activity
## 730  Belongs to the peptidase S8 family / calcium- and calmodulin-responsive adenylate cyclase activity / self proteolysis / domain, Protein / domain protein / -
## 1290                                                                                                                                superoxide dismutase activity
## 1633                                                                                                                          Secreted repeat of unknown function
## 650                                                                                                                            Uracil DNA glycosylase superfamily
## 822                                                                                                                            Uracil DNA glycosylase superfamily
## 100                                                                                                                                 superoxide dismutase activity
## 250                                                                                                                    Belongs to the urease gamma subunit family
##                                                                                                                     best_OG_name
## 964                                                                                              2TQMU@28211|Alphaproteobacteria
## 1804                                                                                             2TU3T@28211|Alphaproteobacteria
## 362                                                                                                       KOG0001@2759|Eukaryota
## 298                                                                                              2TU3T@28211|Alphaproteobacteria
## 88                                                                                                     37Q1U@33090|Viridiplantae
## 836                                                                                               1S3UE@1236|Gammaproteobacteria
## 544                                                                                                      4NJIP@976|Bacteroidetes
## 618                                                                                                       34HM7@3041|Chlorophyta
## 1081                                                                                                        2QPKB@2759|Eukaryota
## 588                                                                                                    37QGH@33090|Viridiplantae
## 1837                                                                                                    1G6TF@1117|Cyanobacteria
## 1169                                                                                                   37QE5@33090|Viridiplantae
## 1183                                                                                                        2S3XN@2759|Eukaryota
## 1419                                                                                                            30SMP@2|Bacteria
## 1542                                                                                                     4NGGW@976|Bacteroidetes
## 1375 COG1404@2|Bacteria / COG2931@2|Bacteria / COG3209@2|Bacteria / COG3210@2|Bacteria / COG4932@2|Bacteria / COG5276@2|Bacteria
## 1504                                                                                                     4NGGW@976|Bacteroidetes
## 730  COG1404@2|Bacteria / COG2931@2|Bacteria / COG3209@2|Bacteria / COG3210@2|Bacteria / COG4932@2|Bacteria / COG5276@2|Bacteria
## 1290                                                                                                     4NGGW@976|Bacteroidetes
## 1633                                                                                                     4NI3N@976|Bacteroidetes
## 650                                                                                              2TSAR@28211|Alphaproteobacteria
## 822                                                                                              2TSUP@28211|Alphaproteobacteria
## 100                                                                                                      4NGGW@976|Bacteroidetes
## 250                                                                                                  41SPI@651137|Thaumarchaeota
##                best_OG_cat
## 964                     KO
## 1804                     P
## 362                      O
## 298                      P
## 88                       P
## 836                      S
## 544                      C
## 618                      P
## 1081                     E
## 588                      P
## 1837                     O
## 1169                     F
## 1183                     S
## 1419                     S
## 1542                     P
## 1375 O / Q / M / U / M / -
## 1504                     P
## 730  O / Q / M / U / M / -
## 1290                     P
## 1633                     M
## 650                      L
## 822                      L
## 100                      P
## 250                      E
##                                                                                                                                                      best_OG_desc
## 964               Facilitates the functional incorporation of the urease nickel metallocenter. This process requires GTP hydrolysis, probably effectuated by UreG
## 1804                                                     Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 362                                                                                                                      cellular macromolecule catabolic process
## 298                                                      Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 88                                                       Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 836                                                                                                                                          Superoxide dismutase
## 544                                                                                                              PFAM Nickel-dependent hydrogenase, large subunit
## 618                                                      Destroys radicals which are normally produced within the cells and which are toxic to biological systems
## 1081                                                                                                                                              urease activity
## 588                                                                                                                     Copper chaperone for superoxide dismutase
## 1837                                  Involved in urease metallocenter assembly. Binds nickel. Probably functions as a nickel donor during metallocenter assembly
## 1169                                                                                                                                     urease accessory protein
## 1183                                                                                                                       Nickel-containing superoxide dismutase
## 1419                                                                                                                       Nickel-containing superoxide dismutase
## 1542                                                                                                                                      Pfam Secreted repeat of
## 1375 Belongs to the peptidase S8 family / calcium- and calmodulin-responsive adenylate cyclase activity / self proteolysis / domain, Protein / domain protein / -
## 1504                                                                                                                                      Pfam Secreted repeat of
## 730  Belongs to the peptidase S8 family / calcium- and calmodulin-responsive adenylate cyclase activity / self proteolysis / domain, Protein / domain protein / -
## 1290                                                                                                                                      Pfam Secreted repeat of
## 1633                                                                                                                                      PFAM Secreted repeat of
## 650                                                                                                                                        uracil-DNA glycosylase
## 822                                                                                                                                        uracil-DNA glycosylase
## 100                                                                                                                                       Pfam Secreted repeat of
## 250                                                                                                                    Belongs to the urease gamma subunit family
##      Preferred_name    CAZy BiGG_Reaction
## 964            ureG       -             -
## 1804           sodB       -             -
## 362               -       -             -
## 298            sodB       -             -
## 88            SODCP       -             -
## 836               -       -             -
## 544               -       -             -
## 618             MSD       -             -
## 1081           URE1       -             -
## 588             CCS       -             -
## 1837           ureE       -             -
## 1169              -       -             -
## 1183              -       -             -
## 1419              -       -             -
## 1542              -       -             -
## 1375              - GH5,GH9             -
## 1504              -       -             -
## 730               - GH5,GH9             -
## 1290              -       -             -
## 1633              -       -             -
## 650      MA20_15960       -             -
## 822             ung       -             -
## 100               -       -             -
## 250            ureA       -             -
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        PFAMs
## 964                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     cobW
## 1804                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       Sod_Fe_C,Sod_Fe_N
## 362                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                ubiquitin
## 298                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        Sod_Fe_C,Sod_Fe_N
## 88                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    Sod_Cu
## 836                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   Sod_Ni
## 544                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             NiFeSe_Hases
## 618                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        Sod_Fe_C,Sod_Fe_N
## 1081                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                          Amidohydro_1,DUF599,Fungal_trans,HMG_box,Urease_alpha,Urease_beta,Urease_gamma
## 588                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               HMA,Sod_Cu
## 1837                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                           UreE_C,UreE_N
## 1169                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               UreD,UreF
## 1183                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  Sod_Ni
## 1419                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                  Sod_Ni
## 1542                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    CHRD
## 1375 5_nucleotid_C,ASH,Autotransporter,Beta_helix,Big_2,Big_3_2,Big_3_3,Big_3_5,Big_5,CARDB,CBM60,CBM_6,CCP_MauG,CHRD,CHU_C,Cadherin,Cadherin-like,Cadherin_3,Calx-beta,CarboxypepD_reg,Cna_B,Cohesin,Collagen_bind,Collar,DUF11,DUF1573,DUF1929,DUF285,DUF3616,DUF4214,DUF4347,DUF4394,DUF5011,DUF5122,DUF637,DUF839,Exo_endo_phos,FG-GAP,FG-GAP_2,FIVAR,FctA,Fil_haemagg,FlgD_ig,Flg_new,Fn3_assoc,GDPD,GSDH,Gly_rich,Glyco_hydro_9,Glyoxal_oxid_N,Gram_pos_anchor,HYR,Haemagg_act,He_PIG,HemolysinCabind,Kelch_1,Kelch_4,LRR_4,LRR_adjacent,LTD,LVIVD,Lactonase,Laminin_G_3,Lectin_leg-like,Lipase,Lipase_GDSL,Lipase_GDSL_2,M60-like_N,Metallophos,MucBP,Mucin_bdg,N_methyl,PA14,PKD,PPC,P_proprotein,Paired_CXXCH_1,Peptidase_C2,Peptidase_M10_C,Peptidase_M14,Peptidase_M60,Peptidase_M64,Peptidase_M8,Peptidase_S8,Peptidase_S9,Pesticin,Pput2613-deam,Pur_ac_phosph_N,Pyocin_S,RGM_C,RHS_repeat,Reprolysin_3,Reprolysin_5,SASA,SBBP,SLH,SdiA-regulated,SdrD_B,SprB,SpvB,TSP_C,TrbI,Trypsin,VCBS,VPEP,VWA,VWD,WxL,fn3
## 1504                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    CHRD
## 730  5_nucleotid_C,ASH,Autotransporter,Beta_helix,Big_2,Big_3_2,Big_3_3,Big_3_5,Big_5,CARDB,CBM60,CBM_6,CCP_MauG,CHRD,CHU_C,Cadherin,Cadherin-like,Cadherin_3,Calx-beta,CarboxypepD_reg,Cna_B,Cohesin,Collagen_bind,Collar,DUF11,DUF1573,DUF1929,DUF285,DUF3616,DUF4214,DUF4347,DUF4394,DUF5011,DUF5122,DUF637,DUF839,Exo_endo_phos,FG-GAP,FG-GAP_2,FIVAR,FctA,Fil_haemagg,FlgD_ig,Flg_new,Fn3_assoc,GDPD,GSDH,Gly_rich,Glyco_hydro_9,Glyoxal_oxid_N,Gram_pos_anchor,HYR,Haemagg_act,He_PIG,HemolysinCabind,Kelch_1,Kelch_4,LRR_4,LRR_adjacent,LTD,LVIVD,Lactonase,Laminin_G_3,Lectin_leg-like,Lipase,Lipase_GDSL,Lipase_GDSL_2,M60-like_N,Metallophos,MucBP,Mucin_bdg,N_methyl,PA14,PKD,PPC,P_proprotein,Paired_CXXCH_1,Peptidase_C2,Peptidase_M10_C,Peptidase_M14,Peptidase_M60,Peptidase_M64,Peptidase_M8,Peptidase_S8,Peptidase_S9,Pesticin,Pput2613-deam,Pur_ac_phosph_N,Pyocin_S,RGM_C,RHS_repeat,Reprolysin_3,Reprolysin_5,SASA,SBBP,SLH,SdiA-regulated,SdrD_B,SprB,SpvB,TSP_C,TrbI,Trypsin,VCBS,VPEP,VWA,VWD,WxL,fn3
## 1290                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    CHRD
## 1633                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                Fasciclin,Lipoprotein_15
## 650                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              DUF4130,UDG
## 822                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      UDG
## 100                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     CHRD
## 250                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                 Urease_beta,Urease_gamma
##                           Species_uniref       Genus_uniref      Family_uniref
## 964                   uc_Rhodobacterales            unknown            unknown
## 1804                             unknown            unknown            unknown
## 362                   uc_Bacillariophyta uc_Bacillariophyta uc_Bacillariophyta
## 298                    Planktomarina sp.            unknown   Rhodobacteraceae
## 88                  Bathycoccus prasinos        Bathycoccus     Bathycoccaceae
## 836                              unknown            unknown            unknown
## 544                          uc_Bacteria        uc_Bacteria        uc_Bacteria
## 618               Prasinoderma singulare       Prasinoderma            unknown
## 1081                        uc_Eukaryota       uc_Eukaryota       uc_Eukaryota
## 588                 Bathycoccus prasinos        Bathycoccus     Bathycoccaceae
## 1837                             unknown      Synechococcus   Synechococcaceae
## 1169                        uc_Eukaryota       uc_Eukaryota       uc_Eukaryota
## 1183              Phaeocystis antarctica            unknown     Phaeocystaceae
## 1419                  Opitutae bacterium            unknown            unknown
## 1542         Planctomycetaceae bacterium            unknown  Planctomycetaceae
## 1375         Planctomycetaceae bacterium            unknown  Planctomycetaceae
## 1504         Planctomycetaceae bacterium            unknown  Planctomycetaceae
## 730  Planctomycetaceae bacterium TMED138            unknown  Planctomycetaceae
## 1290         Planctomycetaceae bacterium            unknown  Planctomycetaceae
## 1633              Rhodobiaceae bacterium            unknown       Rhodobiaceae
## 650               Rhodobiaceae bacterium            unknown       Rhodobiaceae
## 822                           uc_Archaea         uc_Archaea         uc_Archaea
## 100          Planctomycetaceae bacterium            unknown  Planctomycetaceae
## 250                              unknown            unknown            unknown
##            Order_uniref        Class_uniref   Phylum_uniref Domain_uniref
## 964     Rhodobacterales Alphaproteobacteria  Proteobacteria       unknown
## 1804            unknown Alphaproteobacteria  Proteobacteria       unknown
## 362  uc_Bacillariophyta  uc_Bacillariophyta Bacillariophyta       unknown
## 298     Rhodobacterales Alphaproteobacteria  Proteobacteria       unknown
## 88          Mamiellales     Mamiellophyceae     Chlorophyta Viridiplantae
## 836             unknown Gammaproteobacteria  Proteobacteria       unknown
## 544         uc_Bacteria         uc_Bacteria     uc_Bacteria   uc_Bacteria
## 618     Pelagomonadales       Pelagophyceae         unknown       unknown
## 1081       uc_Eukaryota        uc_Eukaryota    uc_Eukaryota  uc_Eukaryota
## 588         Mamiellales     Mamiellophyceae     Chlorophyta Viridiplantae
## 1837    Synechococcales             unknown   Cyanobacteria       unknown
## 1169       uc_Eukaryota        uc_Eukaryota     Chlorophyta Viridiplantae
## 1183      Phaeocystales             unknown      Haptophyta       unknown
## 1419            unknown            Opitutae  Planctomycetes       unknown
## 1542   Planctomycetales      Planctomycetia  Planctomycetes       unknown
## 1375   Planctomycetales      Planctomycetia  Planctomycetes       unknown
## 1504   Planctomycetales      Planctomycetia  Planctomycetes       unknown
## 730    Planctomycetales      Planctomycetia  Planctomycetes       unknown
## 1290   Planctomycetales      Planctomycetia  Planctomycetes       unknown
## 1633   Hyphomicrobiales Alphaproteobacteria  Proteobacteria       unknown
## 650    Hyphomicrobiales Alphaproteobacteria  Proteobacteria       unknown
## 822          uc_Archaea          uc_Archaea      uc_Archaea    uc_Archaea
## 100    Planctomycetales      Planctomycetia  Planctomycetes       unknown
## 250             unknown             unknown  Proteobacteria       unknown
##      Domain_gtdb       Phylum_gtdb           Class_gtdb           Order_gtdb
## 964     Bacteria    Pseudomonadota  Alphaproteobacteria      Rhodobacterales
## 1804    Bacteria    Pseudomonadota  Alphaproteobacteria      Rhodobacterales
## 362     Bacteria    Actinomycetota        Actinomycetes  Propionibacteriales
## 298     Bacteria    Pseudomonadota  Alphaproteobacteria      Rhodobacterales
## 88      Bacteria      Bacteroidota          Bacteroidia      Chitinophagales
## 836     Bacteria    Pseudomonadota  Gammaproteobacteria      Pseudomonadales
## 544     Bacteria      Bacteroidota          Bacteroidia         Cytophagales
## 618     Bacteria    Actinomycetota        Actinomycetes      Actinomycetales
## 1081    Bacteria    Actinomycetota        Actinomycetes     Streptomycetales
## 588     Bacteria      Bacteroidota          Bacteroidia      Chitinophagales
## 1837    Bacteria   Cyanobacteriota       Cyanobacteriia             PCC-6307
## 1169    Bacteria    Actinomycetota        Actinomycetes      Mycobacteriales
## 1183    Bacteria    Actinomycetota        Actinomycetes     Streptomycetales
## 1419    Bacteria   Planctomycetota       Planctomycetia         Pirellulales
## 1542    Bacteria   Planctomycetota       Planctomycetia         Pirellulales
## 1375    Bacteria   Planctomycetota       Planctomycetia         Pirellulales
## 1504    Bacteria   Planctomycetota       Planctomycetia         Pirellulales
## 730     Bacteria   Planctomycetota       Planctomycetia         Pirellulales
## 1290    Bacteria   Planctomycetota       Planctomycetia         Pirellulales
## 1633    Bacteria    Pseudomonadota  Alphaproteobacteria              MED-G09
## 650     Bacteria    Pseudomonadota  Alphaproteobacteria              MED-G09
## 822      Archaea  Thermoplasmatota          Poseidoniia                MGIII
## 100     Bacteria   Planctomycetota       Planctomycetia         Pirellulales
## 250     Bacteria    Pseudomonadota  Alphaproteobacteria    Nitrososphaerales
##             Family_gtdb         Genus_gtdb                   Species_gtdb
## 964    Rhodobacteraceae             Yoonia             Yoonia sp028822885
## 1804   Rhodobacteraceae      Planktomarina      Planktomarina sp905182195
## 362     Nocardioidaceae       Nocardioides             Nocardioides antri
## 298    Rhodobacteraceae      Planktomarina      Planktomarina sp905182195
## 88       Saprospiraceae           JALQTV01           JALQTV01 sp023268675
## 836    Methylophagaceae      GCA-002733105      GCA-002733105 sp905181885
## 544   Cyclobacteriaceae            UBA4465            UBA4465 sp943788235
## 618   Microbacteriaceae             Naasia                Naasia aerilata
## 1081  Streptomycetaceae       Streptomyces       Streptomyces sp007096885
## 588      Saprospiraceae           JALQTV01           JALQTV01 sp023268675
## 1837       Cyanobiaceae  Parasynechococcus  Parasynechococcus sp003210375
## 1169   Pseudomonadaceae      Pseudomonas_E      Pseudomonas_E sp009861525
## 1183  Streptomycetaceae       Streptomyces       Streptomyces antioxidans
## 1419            UBA1268            UBA1268            UBA1268 sp014238855
## 1542            UBA1268            UBA1268            UBA1268 sp014238855
## 1375            UBA1268            UBA1268            UBA1268 sp016776925
## 1504            UBA1268            UBA1268            UBA1268 sp002862165
## 730             UBA1268            UBA1268            UBA1268 sp014238855
## 1290            UBA1268            UBA1268            UBA1268 sp002862165
## 1633            MED-G09            MED-G09            MED-G09 sp028339975
## 650             MED-G09            MED-G09            MED-G09 sp028339975
## 822             CG-Epi1            CG-Epi1            CG-Epi1 sp014240055
## 100             UBA1268            UBA1268            UBA1268 sp002862165
## 250   Nitrosopumilaceae   Nitrosopelagicus     Nitrosopumilus sp028448965
ggplot() +   geom_hline(yintercept = 0, linetype='dotted') +
  geom_vline(xintercept = 0, linetype='dotted') +
  labs(x = paste0("RDA1 (",
                  round(100 * scores$Eigenval.rel[1], 1), "%)"),
       y = paste0("RDA2 (",
                  round(100 * scores$Eigenval.rel[2], 1), "%)")) +
  theme(plot.title=element_text(hjust=0.5)) +
  geom_segment(data=as.data.frame(AGCScore),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'indianred2',arrow = arrow(length = unit(0.2,"cm")), alpha=0.25) +
  geom_point(aes(samples[,1], samples[,2], col=samples$Ni_60_58_D_DELTA_BOTTLE), size = 5, alpha = 0.9) +
  geom_text(aes(samples[,1], samples[,2], label=samples$TM_station_number), col="white", size=3) +
  geom_segment(data=as.data.frame(enviscore),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.8, linetype="F1",color = 'forestgreen',arrow = arrow(length = unit(0.2,"cm"))) +
  geom_text(aes(enviscore[,1]*1.05, enviscore[,2]*1.05, label=rownames(enviscore)), color="forestgreen") +
  geom_text(aes(ST8AGC[,1]*1.05, ST8AGC[,2]*1.05, label=rownames(ST8AGC)), color="orange2") +
  geom_segment(data=as.data.frame(ST8AGC),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'orange2',arrow = arrow(length = unit(0.2,"cm")), alpha=0.8) +
  geom_text(aes(DNiAGC[,1]*1.05, DNiAGC[,2]*1.05, label=rownames(DNiAGC)), color="gold") +
  geom_segment(data=as.data.frame(DNiAGC),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'gold',arrow = arrow(length = unit(0.2,"cm")), alpha=0.8) +
  geom_text(aes(MertzAGC[,1]*1.05, MertzAGC[,2]*1.05, label=rownames(MertzAGC)), color="brown") +
  geom_segment(data=as.data.frame(MertzAGC),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'brown',arrow = arrow(length = unit(0.2,"cm")), alpha=0.8) +
  labs(col = expression(delta^60 * Ni ~ "(‰)")) +
  theme_bw()

7.2 RDA large fraction

#. II - Attached

meta_nickel_ATT_rda = select(meta_nickel_ATT,c("ACE_seq_name","Ni_60_58_D_DELTA_BOTTLE","Ni_D_CONC_BOTTLE"))
row.names(meta_nickel_ATT_rda)=meta_nickel_ATT_rda$ACE_seq_name
meta_nickel_ATT_rda=select(meta_nickel_ATT_rda,-c("ACE_seq_name"))
meta_nickel_ATT_rda=data.frame(scale(meta_nickel_ATT_rda))

# Get rid of the AGC present in only 1 sample
GeneMat_Nickel_ATT <- GeneMat_Nickel_ATT[,-which(colSums(GeneMat_Nickel_ATT>0)<2)]

# IF USING non transformed data, need to transform the abundances to deal with compositionality
#GeneMat_Nickel_ATT <- decostand(GeneMat_Nickel_ATT, method="rclr")

#Match row names
GeneMat_Nickel_ATT_rda=GeneMat_Nickel_ATT[match(row.names(meta_nickel_ATT_rda),row.names(GeneMat_Nickel_ATT)),]

RDA=rda(GeneMat_Nickel_ATT_rda ~ ., data = meta_nickel_ATT_rda)
#summary(RDA)
RsquareAdj(RDA) #28.7%
## $r.squared
## [1] 0.3705166
## 
## $adj.r.squared
## [1] 0.2865855
anova.cca(RDA) #Significant
## Permutation test for rda under reduced model
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = GeneMat_Nickel_ATT_rda ~ Ni_60_58_D_DELTA_BOTTLE + Ni_D_CONC_BOTTLE, data = meta_nickel_ATT_rda)
##          Df Variance      F Pr(>F)    
## Model     2   43.297 4.4145  0.001 ***
## Residual 15   73.558                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova.cca(RDA, by="axis") 
## Permutation test for rda under reduced model
## Forward tests for axes
## Permutation: free
## Number of permutations: 999
## 
## Model: rda(formula = GeneMat_Nickel_ATT_rda ~ Ni_60_58_D_DELTA_BOTTLE + Ni_D_CONC_BOTTLE, data = meta_nickel_ATT_rda)
##          Df Variance      F Pr(>F)    
## RDA1      1   33.716 6.8753  0.001 ***
## RDA2      1    9.581 1.9538  0.023 *  
## Residual 15   73.558                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
scores=scoresRDA(RDA)
## 
## -----------------------------------------------------------------------
## Site constraints (lc) selected. To obtain site scores that are weighted
## sums of species scores (default in vegan), argument site.sc must be set
## to wa.
## -----------------------------------------------------------------------
samples = scores$Sites
samples = as.data.frame(samples)
samples = merge(samples,meta_nickel_ATT, by.x="row.names", by.y="ACE_seq_name")
row.names(samples)=samples[,1]
samples=samples[,-1]

# Retrieve AGC scores
AGCScore <- scores$Species
AGCScore = as.data.frame(AGCScore)
# Keep only scores away from center
AGCScore_select = AGCScore[which(AGCScore$RDA1>0.5 | AGCScore$RDA1<(-0.5) | AGCScore$RDA2>0.5 | AGCScore$RDA2<(-0.5)),]

enviscore=scores$Biplot

#Plot the triplot
ggplot() +   geom_hline(yintercept = 0, linetype='dotted') +
  geom_vline(xintercept = 0, linetype='dotted') +
  labs(x = paste0("RDA1 (",
                  round(100 * scores$Eigenval.rel[1], 1), "%)"),
       y = paste0("RDA2 (",
                  round(100 * scores$Eigenval.rel[2], 1), "%)")) +
  theme(plot.title=element_text(hjust=0.5)) +
  geom_segment(data=as.data.frame(AGCScore),aes(xend = RDA1, yend = RDA2),x=0,y=0,size = 0.5,color = 'indianred2',arrow = arrow(length = unit(0.2,"cm")), alpha=0.25) +
  geom_point(aes(samples[,1], samples[,2], col=samples$Ni_60_58_D_DELTA_BOTTLE), size = 5, alpha = 0.9) +
  geom_text(aes(samples[,1], samples[,2], label=samples$TM_station_number), col="white", size=3) +
  geom_segment(data=as.data.frame(enviscore),aes(xend = RDA1, yend = RDA2),x=0,y=0,size = 0.5, linetype="F1",color = 'orange',arrow = arrow(length = unit(0.2,"cm"))) +
  geom_text(aes(enviscore[,1]*1.05, enviscore[,2]*1.05, label=rownames(enviscore)), color="orange") +
  geom_text(aes(AGCScore_select[,1]*1.05, AGCScore_select[,2]*1.05, label=rownames(AGCScore_select)), color="indianred2") +
  labs(col = "DeltaNickel60-58") +
  geom_segment(data=as.data.frame(AGCScore_select),aes(xend = RDA1, yend = RDA2),x=0,y=0,size = 0.5,color = 'indianred2',arrow = arrow(length = unit(0.2,"cm"))) +
  theme_bw() 

# A pack of AGC is associated to station 11
MertzAGC <- AGCScore[which(AGCScore$RDA1>0.25 & AGCScore$RDA2<(-0.5)),]
MertzAGC <- MertzAGC[order(MertzAGC$RDA2, decreasing = F),]
MertzAGCannot <- AGC_Taxo_Annot[which(AGC_Taxo_Annot$AGC_ID %in% row.names(MertzAGC)),]
MertzAGCannot <- MertzAGCannot[match(row.names(MertzAGC), MertzAGCannot$AGC_ID),]

MertzGenes <- Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID %in% row.names(MertzAGC)),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_1819471"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_2646602"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_2848081"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_22886549"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_5722614"),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_23334844"),]
#summary(Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_2848081"),])

# One is correlated to dNi yet in between station 11 and 8
DNiAGC <- AGCScore[which(AGCScore$RDA1<0.25 & AGCScore$RDA1>(-0.25) & AGCScore$RDA2<(-0.75)),]
DNiAGCannot <- AGC_Taxo_Annot[which(AGC_Taxo_Annot$AGC_ID %in% row.names(DNiAGC)),]
#Genes_Taxo_Annot[which(Genes_Taxo_Annot$AGC_ID=="AGC_24852953"),]

# 19 are associated to station 8
ST8AGC <- AGCScore[which(AGCScore$RDA1<(-0.25) & AGCScore$RDA2<(-0.5)),]
ST8AGC <- ST8AGC[order(ST8AGC$RDA2, decreasing = F),]
ST8AGCannot <- AGC_Taxo_Annot[which(AGC_Taxo_Annot$AGC_ID %in% row.names(ST8AGC)),]
ST8AGCannot <- ST8AGCannot[match(row.names(ST8AGC), ST8AGCannot$AGC_ID),]

ggplot() +   geom_hline(yintercept = 0, linetype='dotted') +
  geom_vline(xintercept = 0, linetype='dotted') +
  labs(x = paste0("RDA1 (",
                  round(100 * scores$Eigenval.rel[1], 1), "%)"),
       y = paste0("RDA2 (",
                  round(100 * scores$Eigenval.rel[2], 1), "%)")) +
  theme(plot.title=element_text(hjust=0.5)) +
  geom_segment(data=as.data.frame(AGCScore),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'indianred2',arrow = arrow(length = unit(0.2,"cm")), alpha=0.25) +
  geom_point(aes(samples[,1], samples[,2], col=samples$Ni_60_58_D_DELTA_BOTTLE), size = 5, alpha = 0.9) +
  geom_text(aes(samples[,1], samples[,2], label=samples$TM_station_number), col="white", size=3) +
  geom_segment(data=as.data.frame(enviscore),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.8, linetype="F1",color = 'forestgreen',arrow = arrow(length = unit(0.2,"cm"))) +
  geom_text(aes(enviscore[,1]*1.05, enviscore[,2]*1.05, label=rownames(enviscore)), color="forestgreen") +
  geom_text(aes(ST8AGC[,1]*1.05, ST8AGC[,2]*1.05, label=rownames(ST8AGC)), color="orange2") +
  geom_segment(data=as.data.frame(ST8AGC),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'orange2',arrow = arrow(length = unit(0.2,"cm"))) +
  geom_text(aes(DNiAGC[,1]*1.05, DNiAGC[,2]*1.05, label=rownames(DNiAGC)), color="gold") +
  geom_segment(data=as.data.frame(DNiAGC),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'gold',arrow = arrow(length = unit(0.2,"cm"))) +
  geom_text(aes(MertzAGC[,1]*1.05, MertzAGC[,2]*1.05, label=rownames(MertzAGC)), color="brown") +
  geom_segment(data=as.data.frame(MertzAGC),aes(xend = RDA1, yend = RDA2),x=0,y=0,linewidth = 0.5,color = 'brown',arrow = arrow(length = unit(0.2,"cm"))) +
  labs(col = expression(delta^60 * Ni ~ "(‰)")) +
  theme_bw()